From 79ff643d24593a1b77bb39233219d55d20efa4bc Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 8 Feb 2021 11:24:57 +0100 Subject: [PATCH] conf: rework rootfs pinning Signed-off-by: Christian Brauner --- src/lxc/conf.c | 91 ++++++++++++++++++++++++++++--------------------- src/lxc/conf.h | 22 ++++++++++-- src/lxc/start.c | 18 ++++------ src/lxc/start.h | 3 -- 4 files changed, 78 insertions(+), 56 deletions(-) diff --git a/src/lxc/conf.c b/src/lxc/conf.c index 0f2f68bc7..ef6ad00ee 100644 --- a/src/lxc/conf.c +++ b/src/lxc/conf.c @@ -477,56 +477,74 @@ int run_script(const char *name, const char *section, const char *script, ...) return run_buffer(buffer); } -/* pin_rootfs +/* lxc_rootfs_prepare * if rootfs is a directory, then open ${rootfs}/.lxc-keep for writing for * the duration of the container run, to prevent the container from marking * the underlying fs readonly on shutdown. unlink the file immediately so * no name pollution is happens. * don't unlink on NFS to avoid random named stale handles. - * return -1 on error. - * return -2 if nothing needed to be pinned. - * return an open fd (>=0) if we pinned it. */ -int pin_rootfs(const char *rootfs) +int lxc_rootfs_prepare(struct lxc_rootfs *rootfs, bool userns) { - __do_free char *absrootfs = NULL; - int fd, ret; - char absrootfspin[PATH_MAX]; - struct stat s; - struct statfs sfs; + __do_close int dfd_path = -EBADF, fd_pin = -EBADF; + int ret; + struct stat st; + struct statfs stfs; - if (rootfs == NULL || strlen(rootfs) == 0) - return -2; + if (rootfs->path) { + if (rootfs->bdev_type && + (!strcmp(rootfs->bdev_type, "overlay") || + !strcmp(rootfs->bdev_type, "overlayfs"))) + return log_trace_errno(0, EINVAL, "Not pinning on stacking filesystem"); - absrootfs = realpath(rootfs, NULL); - if (!absrootfs) - return -2; + dfd_path = open_at(-EBADF, rootfs->path, PROTECT_OPATH_FILE, 0, 0); + } else { + dfd_path = open_at(-EBADF, "/", PROTECT_OPATH_FILE, PROTECT_LOOKUP_ABSOLUTE, 0); + } + if (dfd_path < 0) + return log_error_errno(-errno, errno, "Failed to open \"%s\"", rootfs->path); + + if (!rootfs->path) + return log_trace(0, "Not pinning because container does not have a rootfs"); - ret = stat(absrootfs, &s); + if (userns) + return log_trace(0, "Not pinning because container runs in user namespace"); + + ret = fstat(dfd_path, &st); if (ret < 0) - return -1; + return log_trace_errno(-errno, errno, "Failed to retrieve file status"); - if (!S_ISDIR(s.st_mode)) - return -2; + if (!S_ISDIR(st.st_mode)) + return log_trace_errno(0, ENOTDIR, "Not pinning because file descriptor is not a directory"); - ret = snprintf(absrootfspin, sizeof(absrootfspin), "%s/.lxc-keep", absrootfs); - if (ret < 0 || (size_t)ret >= sizeof(absrootfspin)) - return -1; + fd_pin = open_at(dfd_path, ".lxc_keep", + PROTECT_OPEN | O_CREAT, + PROTECT_LOOKUP_BENEATH, + S_IWUSR | S_IRUSR); + if (fd_pin < 0) + return log_error_errno(-errno, errno, "Failed to pin rootfs"); - fd = open(absrootfspin, O_CREAT | O_RDWR, S_IWUSR | S_IRUSR | O_CLOEXEC); - if (fd < 0) - return fd; + TRACE("Pinned rootfs %d(.lxc_keep)", fd_pin); - ret = fstatfs (fd, &sfs); - if (ret < 0) - return fd; + ret = fstatfs(fd_pin, &stfs); + if (ret < 0) { + SYSWARN("Failed to retrieve filesystem status"); + goto out; + } - if (sfs.f_type == NFS_SUPER_MAGIC) - return log_debug(fd, "Rootfs on NFS, not unlinking pin file \"%s\"", absrootfspin); + if (stfs.f_type == NFS_SUPER_MAGIC) { + DEBUG("Not unlinking pinned file on NFS"); + goto out; + } - (void)unlink(absrootfspin); + if (unlinkat(dfd_path, ".lxc_keep", 0)) + SYSTRACE("Failed to unlink rootfs pinning file %d(.lxc_keep)", dfd_path); + else + TRACE("Unlinked pinned file %d(.lxc_keep)", dfd_path); - return fd; +out: + rootfs->fd_path_pin = move_fd(fd_pin); + return 0; } static int add_shmount_to_list(struct lxc_conf *conf) @@ -2585,6 +2603,7 @@ struct lxc_conf *lxc_conf_init(void) new->rootfs.dfd_mnt = -EBADF; new->rootfs.dfd_dev = -EBADF; new->rootfs.dfd_host = -EBADF; + new->rootfs.fd_path_pin = -EBADF; new->logfd = -1; lxc_list_init(&new->cgroup); lxc_list_init(&new->cgroup2); @@ -3490,9 +3509,7 @@ int lxc_setup(struct lxc_handler *handler) return log_error(-1, "Failed to drop capabilities"); } - close_prot_errno_disarm(lxc_conf->rootfs.dfd_mnt) - close_prot_errno_disarm(lxc_conf->rootfs.dfd_dev) - close_prot_errno_disarm(lxc_conf->rootfs.dfd_host) + put_lxc_rootfs(&handler->conf->rootfs, true); NOTICE("The container \"%s\" is set up", name); return 0; @@ -3856,9 +3873,7 @@ void lxc_conf_free(struct lxc_conf *conf) free(conf->rootfs.options); free(conf->rootfs.path); free(conf->rootfs.data); - close_prot_errno_disarm(conf->rootfs.dfd_mnt); - close_prot_errno_disarm(conf->rootfs.dfd_dev); - close_prot_errno_disarm(conf->rootfs.dfd_host); + put_lxc_rootfs(&conf->rootfs, true); free(conf->logfile); if (conf->logfd != -1) close(conf->logfd); diff --git a/src/lxc/conf.h b/src/lxc/conf.h index 489b955cd..bb61b2597 100644 --- a/src/lxc/conf.h +++ b/src/lxc/conf.h @@ -196,10 +196,15 @@ struct lxc_tty_info { */ struct lxc_rootfs { int dfd_host; - int dfd_mnt; - int dfd_dev; + char *path; + int fd_path_pin; + + int dfd_mnt; char *mount; + + int dfd_dev; + char buf[PATH_MAX]; char *bdev_type; char *options; @@ -481,7 +486,7 @@ extern struct lxc_conf *current_config; __hidden extern int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf, char *argv[]); __hidden extern struct lxc_conf *lxc_conf_init(void); __hidden extern void lxc_conf_free(struct lxc_conf *conf); -__hidden extern int pin_rootfs(const char *rootfs); +__hidden extern int lxc_rootfs_prepare(struct lxc_rootfs *rootfs, bool userns); __hidden extern int lxc_map_ids(struct lxc_list *idmap, pid_t pid); __hidden extern int lxc_create_tty(const char *name, struct lxc_conf *conf); __hidden extern void lxc_delete_tty(struct lxc_tty_info *ttys); @@ -557,4 +562,15 @@ static inline const char *get_rootfs_mnt(const struct lxc_rootfs *rootfs) return !is_empty_string(rootfs->path) ? rootfs->mount : s; } +static inline void put_lxc_rootfs(struct lxc_rootfs *rootfs, bool unpin) +{ + if (rootfs) { + close_prot_errno_disarm(rootfs->dfd_host); + close_prot_errno_disarm(rootfs->dfd_mnt); + close_prot_errno_disarm(rootfs->dfd_dev); + if (unpin) + close_prot_errno_disarm(rootfs->fd_path_pin); + } +} + #endif /* __LXC_CONF_H */ diff --git a/src/lxc/start.c b/src/lxc/start.c index 019014c0c..90ee8dbfe 100644 --- a/src/lxc/start.c +++ b/src/lxc/start.c @@ -618,7 +618,6 @@ out_sigfd: void lxc_put_handler(struct lxc_handler *handler) { - close_prot_errno_disarm(handler->pinfd); close_prot_errno_disarm(handler->pidfd); close_prot_errno_disarm(handler->sigfd); lxc_put_nsfds(handler); @@ -660,7 +659,6 @@ struct lxc_handler *lxc_init_handler(struct lxc_handler *old, handler->data_sock[0] = -EBADF; handler->data_sock[1] = -EBADF; handler->monitor_status_fd = -EBADF; - handler->pinfd = -EBADF; handler->pidfd = -EBADF; handler->sigfd = -EBADF; handler->state_socket_pair[0] = -EBADF; @@ -925,6 +923,8 @@ void lxc_end(struct lxc_handler *handler) cgroup_ops->monitor_destroy(cgroup_ops, handler); } + put_lxc_rootfs(&handler->conf->rootfs, true); + if (handler->conf->reboot == REBOOT_NONE) { /* For all new state clients simply close the command socket. * This will inform all state clients that the container is @@ -1066,9 +1066,6 @@ static int do_start(void *data) goto out_warn_father; } - /* Don't leak the pinfd to the container. */ - close_prot_errno_disarm(handler->pinfd); - if (!lxc_sync_wait_parent(handler, START_SYNC_STARTUP)) goto out_warn_father; @@ -1666,10 +1663,10 @@ static int lxc_spawn(struct lxc_handler *handler) * it readonly. * If the container is unprivileged then skip rootfs pinning. */ - if (!wants_to_map_ids) { - handler->pinfd = pin_rootfs(conf->rootfs.path); - if (handler->pinfd == -EBADF) - INFO("Failed to pin the rootfs for container \"%s\"", handler->name); + ret = lxc_rootfs_prepare(&conf->rootfs, wants_to_map_ids); + if (ret) { + ERROR("Failed to handle rootfs pinning for container \"%s\"", handler->name); + goto out_delete_net; } /* Create a process in a new set of namespaces. */ @@ -2001,7 +1998,6 @@ out_abort: out_sync_fini: lxc_sync_fini(handler); - close_prot_errno_disarm(handler->pinfd); return -1; } @@ -2118,8 +2114,6 @@ int __lxc_start(struct lxc_handler *handler, struct lxc_operations *ops, if (ret < 0) ERROR("Failed to move physical network devices back to parent network namespace"); - close_prot_errno_disarm(handler->pinfd); - lxc_monitor_send_exit_code(name, status, handler->lxcpath); lxc_error_set_and_log(handler->pid, status); if (error_num) diff --git a/src/lxc/start.h b/src/lxc/start.h index 9d23df955..45c48a650 100644 --- a/src/lxc/start.h +++ b/src/lxc/start.h @@ -43,9 +43,6 @@ struct lxc_handler { __aligned_u64 clone_flags; }; - /* File descriptor to pin the rootfs for privileged containers. */ - int pinfd; - /* Signal file descriptor. */ int sigfd; -- 2.47.2