*/
int lxc_rootfs_prepare(struct lxc_rootfs *rootfs, bool userns)
{
- __do_close int dfd_path = -EBADF, fd_pin = -EBADF;
+ __do_close int dfd_path = -EBADF, fd_pin = -EBADF, fd_userns = -EBADF;
int ret;
struct stat st;
struct statfs stfs;
+ if (!is_empty_string(rootfs->mnt_opts.userns_path)) {
+ fd_userns = open_at(-EBADF, rootfs->mnt_opts.userns_path,
+ PROTECT_OPEN_WITH_TRAILING_SYMLINKS, 0, 0);
+ if (fd_userns < 0)
+ return syserror("Failed to open user namespace");
+ }
+
if (rootfs->path) {
if (rootfs->bdev_type &&
(strequal(rootfs->bdev_type, "overlay") ||
dfd_path = open_at(-EBADF, "/", PROTECT_OPATH_FILE, PROTECT_LOOKUP_ABSOLUTE, 0);
}
if (dfd_path < 0)
- return log_error_errno(-errno, errno, "Failed to open \"%s\"", rootfs->path);
+ return syserror("Failed to open \"%s\"", rootfs->path);
- if (!rootfs->path)
- return log_trace(0, "Not pinning because container does not have a rootfs");
+ if (!rootfs->path) {
+ TRACE("Not pinning because container does not have a rootfs");
+ goto out;
+ }
- if (userns)
- return log_trace(0, "Not pinning because container runs in user namespace");
+ if (userns) {
+ TRACE("Not pinning because container runs in user namespace");
+ goto out;
+ }
ret = fstat(dfd_path, &st);
if (ret < 0)
PROTECT_LOOKUP_BENEATH,
S_IWUSR | S_IRUSR);
if (fd_pin < 0)
- return log_error_errno(-errno, errno, "Failed to pin rootfs");
+ return syserror("Failed to pin rootfs");
TRACE("Pinned rootfs %d(.lxc_keep)", fd_pin);
out:
rootfs->fd_path_pin = move_fd(fd_pin);
+ rootfs->mnt_opts.userns_fd = move_fd(fd_userns);
return 0;
}
/* Remove "optional", "create=dir", and "create=file" from mntopt */
int parse_lxc_mntopts(struct lxc_mount_options *opts, char *mnt_opts)
{
+ __do_close int fd_userns = -EBADF;
for (size_t i = LXC_MOUNT_CREATE_DIR; i < LXC_MOUNT_MAX; i++) {
const char *opt_name = lxc_mount_options_info[i];
if (is_empty_string(opts->userns_path))
return syserror_set(-EINVAL, "Missing idmap path for \"idmap=<path>\" LXC specific mount option");
- TRACE("Parse LXC specific mount option \"idmap=%s\"", opts->userns_path);
+ close_prot_errno_disarm(fd_userns);
+ fd_userns = open(opts->userns_path, O_RDONLY | O_NOCTTY | O_CLOEXEC);
+ if (fd_userns < 0)
+ return syserror("Failed to open user namespace");
+
+ TRACE("Parse LXC specific mount option %d->\"idmap=%s\"", fd_userns, opts->userns_path);
break;
default:
return syserror_set(-EINVAL, "Unknown LXC specific mount option");
new->rootfs.dfd_dev = -EBADF;
new->rootfs.dfd_host = -EBADF;
new->rootfs.fd_path_pin = -EBADF;
+ new->rootfs.mnt_opts.userns_fd = -EBADF;
new->logfd = -1;
lxc_list_init(&new->cgroup);
lxc_list_init(&new->cgroup2);
int optional : 1;
int relative : 1;
char userns_path[PATH_MAX];
+ int userns_fd;
};
/* Defines a structure to store the rootfs location, the
return !is_empty_string(rootfs->path) ? rootfs->mount : s;
}
+static inline bool idmapped_rootfs_mnt(const struct lxc_rootfs *rootfs)
+{
+ return rootfs->mnt_opts.userns_fd >= 0;
+}
+
static inline void put_lxc_rootfs(struct lxc_rootfs *rootfs, bool unpin)
{
if (rootfs) {
close_prot_errno_disarm(rootfs->dfd_host);
close_prot_errno_disarm(rootfs->dfd_mnt);
close_prot_errno_disarm(rootfs->dfd_dev);
+ close_prot_errno_disarm(rootfs->mnt_opts.userns_fd);
if (unpin)
close_prot_errno_disarm(rootfs->fd_path_pin);
}
return 0;
}
-int fd_bind_mount(int dfd_from, const char *path_from,
- __u64 o_flags_from, __u64 resolve_flags_from,
- int dfd_to, const char *path_to,
- __u64 o_flags_to, __u64 resolve_flags_to,
- unsigned int attr_flags, bool recursive)
+static int __fd_bind_mount(int dfd_from, const char *path_from,
+ __u64 o_flags_from, __u64 resolve_flags_from,
+ int dfd_to, const char *path_to, __u64 o_flags_to,
+ __u64 resolve_flags_to, unsigned int attr_flags,
+ int userns_fd, bool recursive)
{
+ struct lxc_mount_attr attr = {
+ .attr_set = attr_flags,
+ };
__do_close int __fd_from = -EBADF, __fd_to = -EBADF;
__do_close int fd_tree_from = -EBADF;
unsigned int open_tree_flags = AT_EMPTY_PATH | OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC;
fd_tree_from = open_tree(fd_from, "", open_tree_flags);
if (fd_tree_from < 0)
- return log_error_errno(-errno, errno, "Failed to create detached mount");
+ return syserror("Failed to create detached mount");
+
+ if (userns_fd >= 0) {
+ attr.attr_set |= MOUNT_ATTR_IDMAP;
+ attr.userns_fd = userns_fd;
+ }
+
+ if (attr.attr_set) {
+ ret = mount_setattr(fd_tree_from, "",
+ AT_EMPTY_PATH | AT_RECURSIVE,
+ &attr, sizeof(attr));
+ if (ret < 0)
+ return syserror("Failed to change mount attributes");
+ }
if (!is_empty_string(path_to)) {
struct lxc_open_how how = {
ret = move_mount(fd_tree_from, "", fd_to, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH);
if (ret)
- return log_error_errno(-errno, errno, "Failed to attach detached mount %d to filesystem at %d", fd_tree_from, fd_to);
+ return syserror("Failed to attach detached mount %d to filesystem at %d", fd_tree_from, fd_to);
TRACE("Attach detached mount %d to filesystem at %d", fd_tree_from, fd_to);
return 0;
}
+int fd_mount_idmapped(int dfd_from, const char *path_from,
+ __u64 o_flags_from, __u64 resolve_flags_from,
+ int dfd_to, const char *path_to,
+ __u64 o_flags_to, __u64 resolve_flags_to,
+ unsigned int attr_flags, int userns_fd, bool recursive)
+{
+ return __fd_bind_mount(dfd_from, path_from, o_flags_from, resolve_flags_from,
+ dfd_to, path_to, o_flags_to, resolve_flags_to,
+ attr_flags, userns_fd, recursive);
+}
+
+int fd_bind_mount(int dfd_from, const char *path_from,
+ __u64 o_flags_from, __u64 resolve_flags_from,
+ int dfd_to, const char *path_to,
+ __u64 o_flags_to, __u64 resolve_flags_to,
+ unsigned int attr_flags, bool recursive)
+{
+ return __fd_bind_mount(dfd_from, path_from, o_flags_from, resolve_flags_from,
+ dfd_to, path_to, o_flags_to, resolve_flags_to,
+ attr_flags, -EBADF, recursive);
+}
+
int calc_remount_flags_new(int dfd_from, const char *path_from,
__u64 o_flags_from, __u64 resolve_flags_from,
bool remount, unsigned long cur_flags,
__u64 o_flags_to, __u64 resolve_flags_to,
unsigned int attr_flags, bool recursive);
+__hidden extern int fd_mount_idmapped(int dfd_from, const char *path_from,
+ __u64 o_flags_from, __u64 resolve_flags_from,
+ int dfd_to, const char *path_to,
+ __u64 o_flags_to, __u64 resolve_flags_to,
+ unsigned int attr_flags, int userns_fd,
+ bool recursive);
+
__hidden extern int calc_remount_flags_new(int dfd_from, const char *path_from,
__u64 o_flags_from,
__u64 resolve_flags_from,
goto out_delete_net;
}
- /* If the rootfs is not a blockdev, prevent the container from marking
- * it readonly.
- * If the container is unprivileged then skip rootfs pinning.
- */
- ret = lxc_rootfs_prepare(&conf->rootfs, wants_to_map_ids);
- if (ret) {
- ERROR("Failed to handle rootfs pinning for container \"%s\"", handler->name);
- goto out_delete_net;
- }
-
/* Create a process in a new set of namespaces. */
if (share_ns) {
pid_t attacher_pid;
goto out_abort;
}
+ /* If the rootfs is not a blockdev, prevent the container from marking
+ * it readonly.
+ * If the container is unprivileged then skip rootfs pinning.
+ */
+ ret = lxc_rootfs_prepare(&conf->rootfs, !lxc_list_empty(&conf->id_map));
+ if (ret) {
+ ERROR("Failed to handle rootfs pinning for container \"%s\"", handler->name);
+ ret = -1;
+ goto out_abort;
+ }
+
if (geteuid() == 0 && !lxc_list_empty(&conf->id_map)) {
/* If the backing store is a device, mount it here and now. */
- if (rootfs_is_blockdev(conf)) {
+ if (idmapped_rootfs_mnt(&conf->rootfs) || rootfs_is_blockdev(conf)) {
ret = unshare(CLONE_NEWNS);
if (ret < 0) {
ERROR("Failed to unshare CLONE_NEWNS");
src = lxc_storage_get_path(bdev->src, bdev->type);
- ret = mount(src, bdev->dest, "bind", MS_BIND | MS_REC | mntflags | pflags, mntdata);
- if (ret < 0)
- return log_error_errno(-errno, errno, "Failed to mount \"%s\" on \"%s\"", src, bdev->dest);
-
- if (ret == 0 && (mntflags & MS_RDONLY)) {
- mflags = add_required_remount_flags(src, bdev->dest, MS_BIND | MS_REC | mntflags | pflags | MS_REMOUNT);
- ret = mount(src, bdev->dest, "bind", mflags, mntdata);
+ if (can_use_bind_mounts()) {
+ __do_close int fd_source = -EBADF, fd_target = -EBADF;
+
+ fd_source = open_at(-EBADF, src, PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_ABSOLUTE, 0);
+ if (fd_source < 0)
+ return syserror("Failed to open \"%s\"", src);
+
+ fd_target = open_at(-EBADF, bdev->dest, PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_ABSOLUTE, 0);
+ if (fd_target < 0)
+ return syserror("Failed to open \"%s\"", bdev->dest);
+
+ ret = fd_mount_idmapped(fd_source, "", PROTECT_OPATH_DIRECTORY,
+ PROTECT_LOOKUP_BENEATH, fd_target, "",
+ PROTECT_OPATH_DIRECTORY,
+ PROTECT_LOOKUP_BENEATH, 0,
+ bdev->rootfs->mnt_opts.userns_fd, true);
+ if (ret < 0)
+ return syserror("Failed to mount \"%s\" onto \"%s\"", src, bdev->dest);
+ } else {
+ ret = mount(src, bdev->dest, "bind", MS_BIND | MS_REC | mntflags | pflags, mntdata);
if (ret < 0)
- return log_error_errno(-errno, errno, "Failed to remount \"%s\" on \"%s\" read-only with options \"%s\", mount flags \"%lu\", and propagation flags \"%lu\"",
- src ? src : "(none)", bdev->dest ? bdev->dest : "(none)", mntdata, mflags, pflags);
- else
- DEBUG("Remounted \"%s\" on \"%s\" read-only with options \"%s\", mount flags \"%lu\", and propagation flags \"%lu\"",
- src ? src : "(none)", bdev->dest ? bdev->dest : "(none)", mntdata, mflags, pflags);
+ return log_error_errno(-errno, errno, "Failed to mount \"%s\" on \"%s\"", src, bdev->dest);
+
+ if (ret == 0 && (mntflags & MS_RDONLY)) {
+ mflags = add_required_remount_flags(src, bdev->dest, MS_BIND | MS_REC | mntflags | pflags | MS_REMOUNT);
+
+ ret = mount(src, bdev->dest, "bind", mflags, mntdata);
+ if (ret < 0)
+ return log_error_errno(-errno, errno, "Failed to remount \"%s\" on \"%s\" read-only with options \"%s\", mount flags \"%lu\", and propagation flags \"%lu\"",
+ src ? src : "(none)", bdev->dest ? bdev->dest : "(none)", mntdata, mflags, pflags);
+ else
+ DEBUG("Remounted \"%s\" on \"%s\" read-only with options \"%s\", mount flags \"%lu\", and propagation flags \"%lu\"",
+ src ? src : "(none)", bdev->dest ? bdev->dest : "(none)", mntdata, mflags, pflags);
+ }
+
+ TRACE("Mounted \"%s\" on \"%s\" with options \"%s\", mount flags \"%lu\", and propagation flags \"%lu\"",
+ src ? src : "(none)", bdev->dest ? bdev->dest : "(none)", mntdata, mflags, pflags);
}
- TRACE("Mounted \"%s\" on \"%s\" with options \"%s\", mount flags \"%lu\", and propagation flags \"%lu\"",
- src ? src : "(none)", bdev->dest ? bdev->dest : "(none)", mntdata, mflags, pflags);
+ TRACE("Mounted \"%s\" onto \"%s\"", src, bdev->dest);
return 0;
}