return run_buffer(buffer);
}
-/* pin_rootfs
+/* lxc_rootfs_prepare
* if rootfs is a directory, then open ${rootfs}/.lxc-keep for writing for
* the duration of the container run, to prevent the container from marking
* the underlying fs readonly on shutdown. unlink the file immediately so
* no name pollution is happens.
* don't unlink on NFS to avoid random named stale handles.
- * return -1 on error.
- * return -2 if nothing needed to be pinned.
- * return an open fd (>=0) if we pinned it.
*/
-int pin_rootfs(const char *rootfs)
+int lxc_rootfs_prepare(struct lxc_rootfs *rootfs, bool userns)
{
- __do_free char *absrootfs = NULL;
- int fd, ret;
- char absrootfspin[PATH_MAX];
- struct stat s;
- struct statfs sfs;
+ __do_close int dfd_path = -EBADF, fd_pin = -EBADF;
+ int ret;
+ struct stat st;
+ struct statfs stfs;
- if (rootfs == NULL || strlen(rootfs) == 0)
- return -2;
+ if (rootfs->path) {
+ if (rootfs->bdev_type &&
+ (!strcmp(rootfs->bdev_type, "overlay") ||
+ !strcmp(rootfs->bdev_type, "overlayfs")))
+ return log_trace_errno(0, EINVAL, "Not pinning on stacking filesystem");
- absrootfs = realpath(rootfs, NULL);
- if (!absrootfs)
- return -2;
+ dfd_path = open_at(-EBADF, rootfs->path, PROTECT_OPATH_FILE, 0, 0);
+ } else {
+ dfd_path = open_at(-EBADF, "/", PROTECT_OPATH_FILE, PROTECT_LOOKUP_ABSOLUTE, 0);
+ }
+ if (dfd_path < 0)
+ return log_error_errno(-errno, errno, "Failed to open \"%s\"", rootfs->path);
+
+ if (!rootfs->path)
+ return log_trace(0, "Not pinning because container does not have a rootfs");
- ret = stat(absrootfs, &s);
+ if (userns)
+ return log_trace(0, "Not pinning because container runs in user namespace");
+
+ ret = fstat(dfd_path, &st);
if (ret < 0)
- return -1;
+ return log_trace_errno(-errno, errno, "Failed to retrieve file status");
- if (!S_ISDIR(s.st_mode))
- return -2;
+ if (!S_ISDIR(st.st_mode))
+ return log_trace_errno(0, ENOTDIR, "Not pinning because file descriptor is not a directory");
- ret = snprintf(absrootfspin, sizeof(absrootfspin), "%s/.lxc-keep", absrootfs);
- if (ret < 0 || (size_t)ret >= sizeof(absrootfspin))
- return -1;
+ fd_pin = open_at(dfd_path, ".lxc_keep",
+ PROTECT_OPEN | O_CREAT,
+ PROTECT_LOOKUP_BENEATH,
+ S_IWUSR | S_IRUSR);
+ if (fd_pin < 0)
+ return log_error_errno(-errno, errno, "Failed to pin rootfs");
- fd = open(absrootfspin, O_CREAT | O_RDWR, S_IWUSR | S_IRUSR | O_CLOEXEC);
- if (fd < 0)
- return fd;
+ TRACE("Pinned rootfs %d(.lxc_keep)", fd_pin);
- ret = fstatfs (fd, &sfs);
- if (ret < 0)
- return fd;
+ ret = fstatfs(fd_pin, &stfs);
+ if (ret < 0) {
+ SYSWARN("Failed to retrieve filesystem status");
+ goto out;
+ }
- if (sfs.f_type == NFS_SUPER_MAGIC)
- return log_debug(fd, "Rootfs on NFS, not unlinking pin file \"%s\"", absrootfspin);
+ if (stfs.f_type == NFS_SUPER_MAGIC) {
+ DEBUG("Not unlinking pinned file on NFS");
+ goto out;
+ }
- (void)unlink(absrootfspin);
+ if (unlinkat(dfd_path, ".lxc_keep", 0))
+ SYSTRACE("Failed to unlink rootfs pinning file %d(.lxc_keep)", dfd_path);
+ else
+ TRACE("Unlinked pinned file %d(.lxc_keep)", dfd_path);
- return fd;
+out:
+ rootfs->fd_path_pin = move_fd(fd_pin);
+ return 0;
}
static int add_shmount_to_list(struct lxc_conf *conf)
new->rootfs.dfd_mnt = -EBADF;
new->rootfs.dfd_dev = -EBADF;
new->rootfs.dfd_host = -EBADF;
+ new->rootfs.fd_path_pin = -EBADF;
new->logfd = -1;
lxc_list_init(&new->cgroup);
lxc_list_init(&new->cgroup2);
return log_error(-1, "Failed to drop capabilities");
}
- close_prot_errno_disarm(lxc_conf->rootfs.dfd_mnt)
- close_prot_errno_disarm(lxc_conf->rootfs.dfd_dev)
- close_prot_errno_disarm(lxc_conf->rootfs.dfd_host)
+ put_lxc_rootfs(&handler->conf->rootfs, true);
NOTICE("The container \"%s\" is set up", name);
return 0;
free(conf->rootfs.options);
free(conf->rootfs.path);
free(conf->rootfs.data);
- close_prot_errno_disarm(conf->rootfs.dfd_mnt);
- close_prot_errno_disarm(conf->rootfs.dfd_dev);
- close_prot_errno_disarm(conf->rootfs.dfd_host);
+ put_lxc_rootfs(&conf->rootfs, true);
free(conf->logfile);
if (conf->logfd != -1)
close(conf->logfd);
*/
struct lxc_rootfs {
int dfd_host;
- int dfd_mnt;
- int dfd_dev;
+
char *path;
+ int fd_path_pin;
+
+ int dfd_mnt;
char *mount;
+
+ int dfd_dev;
+
char buf[PATH_MAX];
char *bdev_type;
char *options;
__hidden extern int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf, char *argv[]);
__hidden extern struct lxc_conf *lxc_conf_init(void);
__hidden extern void lxc_conf_free(struct lxc_conf *conf);
-__hidden extern int pin_rootfs(const char *rootfs);
+__hidden extern int lxc_rootfs_prepare(struct lxc_rootfs *rootfs, bool userns);
__hidden extern int lxc_map_ids(struct lxc_list *idmap, pid_t pid);
__hidden extern int lxc_create_tty(const char *name, struct lxc_conf *conf);
__hidden extern void lxc_delete_tty(struct lxc_tty_info *ttys);
return !is_empty_string(rootfs->path) ? rootfs->mount : s;
}
+static inline void put_lxc_rootfs(struct lxc_rootfs *rootfs, bool unpin)
+{
+ if (rootfs) {
+ close_prot_errno_disarm(rootfs->dfd_host);
+ close_prot_errno_disarm(rootfs->dfd_mnt);
+ close_prot_errno_disarm(rootfs->dfd_dev);
+ if (unpin)
+ close_prot_errno_disarm(rootfs->fd_path_pin);
+ }
+}
+
#endif /* __LXC_CONF_H */
void lxc_put_handler(struct lxc_handler *handler)
{
- close_prot_errno_disarm(handler->pinfd);
close_prot_errno_disarm(handler->pidfd);
close_prot_errno_disarm(handler->sigfd);
lxc_put_nsfds(handler);
handler->data_sock[0] = -EBADF;
handler->data_sock[1] = -EBADF;
handler->monitor_status_fd = -EBADF;
- handler->pinfd = -EBADF;
handler->pidfd = -EBADF;
handler->sigfd = -EBADF;
handler->state_socket_pair[0] = -EBADF;
cgroup_ops->monitor_destroy(cgroup_ops, handler);
}
+ put_lxc_rootfs(&handler->conf->rootfs, true);
+
if (handler->conf->reboot == REBOOT_NONE) {
/* For all new state clients simply close the command socket.
* This will inform all state clients that the container is
goto out_warn_father;
}
- /* Don't leak the pinfd to the container. */
- close_prot_errno_disarm(handler->pinfd);
-
if (!lxc_sync_wait_parent(handler, START_SYNC_STARTUP))
goto out_warn_father;
* it readonly.
* If the container is unprivileged then skip rootfs pinning.
*/
- if (!wants_to_map_ids) {
- handler->pinfd = pin_rootfs(conf->rootfs.path);
- if (handler->pinfd == -EBADF)
- INFO("Failed to pin the rootfs for container \"%s\"", handler->name);
+ ret = lxc_rootfs_prepare(&conf->rootfs, wants_to_map_ids);
+ if (ret) {
+ ERROR("Failed to handle rootfs pinning for container \"%s\"", handler->name);
+ goto out_delete_net;
}
/* Create a process in a new set of namespaces. */
out_sync_fini:
lxc_sync_fini(handler);
- close_prot_errno_disarm(handler->pinfd);
return -1;
}
if (ret < 0)
ERROR("Failed to move physical network devices back to parent network namespace");
- close_prot_errno_disarm(handler->pinfd);
-
lxc_monitor_send_exit_code(name, status, handler->lxcpath);
lxc_error_set_and_log(handler->pid, status);
if (error_num)