#include <libgen.h>
#include <linux/loop.h>
#include <dirent.h>
+#include <sys/prctl.h>
#include "lxc.h"
#include "config.h"
.can_snapshot = true,
};
+//
+// nbd dev ops
+//
+
+static int nbd_detect(const char *path)
+{
+ if (strncmp(path, "nbd:", 4) == 0)
+ return 1;
+ return 0;
+}
+
+struct nbd_attach_data {
+ const char *nbd;
+ const char *path;
+};
+
+static void nbd_detach(const char *path)
+{
+ int ret;
+ pid_t pid = fork();
+
+ if (pid < 0) {
+ SYSERROR("Error forking to detach nbd");
+ return;
+ }
+ if (pid) {
+ ret = wait_for_pid(pid);
+ if (ret < 0)
+ ERROR("nbd disconnect returned an error");
+ return;
+ }
+ execlp("qemu-nbd", "qemu-nbd", "-d", path, NULL);
+ SYSERROR("Error executing qemu-nbd");
+ exit(1);
+}
+
+static int do_attach_nbd(void *d)
+{
+ struct nbd_attach_data *data = d;
+ const char *nbd, *path;
+ pid_t pid;
+ sigset_t mask;
+ int sfd;
+ ssize_t s;
+ struct signalfd_siginfo fdsi;
+
+ sigemptyset(&mask);
+ sigaddset(&mask, SIGHUP);
+ sigaddset(&mask, SIGCHLD);
+
+ nbd = data->nbd;
+ path = data->path;
+
+ if (sigprocmask(SIG_BLOCK, &mask, NULL) == -1) {
+ SYSERROR("Error blocking signals for nbd watcher");
+ exit(1);
+ }
+
+ sfd = signalfd(-1, &mask, 0);
+ if (sfd == -1) {
+ SYSERROR("Error opening signalfd for nbd task");
+ exit(1);
+ }
+
+ if (prctl(PR_SET_PDEATHSIG, SIGHUP, 0, 0, 0) < 0)
+ SYSERROR("Error setting parent death signal for nbd watcher");
+
+ pid = fork();
+ if (pid) {
+ for (;;) {
+ s = read(sfd, &fdsi, sizeof(struct signalfd_siginfo));
+ if (s != sizeof(struct signalfd_siginfo))
+ SYSERROR("Error reading from signalfd");
+
+ if (fdsi.ssi_signo == SIGHUP) {
+ /* container has exited */
+ nbd_detach(nbd);
+ exit(0);
+ } else if (fdsi.ssi_signo == SIGCHLD) {
+ int status;
+ while (waitpid(-1, &status, WNOHANG) > 0);
+ }
+ }
+ }
+
+ close(sfd);
+ if (sigprocmask(SIG_UNBLOCK, &mask, NULL) == -1)
+ WARN("Warning: unblocking signals for nbd watcher");
+
+ execlp("qemu-nbd", "qemu-nbd", "-c", nbd, path, NULL);
+ SYSERROR("Error executing qemu-nbd");
+ exit(1);
+}
+
+static bool clone_attach_nbd(const char *nbd, const char *path)
+{
+ pid_t pid;
+ struct nbd_attach_data data;
+
+ data.nbd = nbd;
+ data.path = path;
+
+ pid = lxc_clone(do_attach_nbd, &data, CLONE_NEWPID);
+ if (pid < 0)
+ return false;
+ return true;
+}
+
+static bool nbd_busy(int idx)
+{
+ char path[100];
+ int ret;
+
+ ret = snprintf(path, 100, "/sys/block/nbd%d/pid", idx);
+ if (ret < 0 || ret >= 100)
+ return true;
+ return file_exists(path);
+}
+
+static bool attach_nbd(char *src, struct lxc_conf *conf)
+{
+ char *orig = alloca(strlen(src)+1), *p, path[50];
+ int i = 0;
+
+ strcpy(orig, src);
+ /* if path is followed by a partition, drop that for now */
+ p = strchr(orig, ':');
+ if (p)
+ *p = '\0';
+ while (1) {
+ sprintf(path, "/dev/nbd%d", i);
+ if (!file_exists(path))
+ return false;
+ if (nbd_busy(i)) {
+ i++;
+ continue;
+ }
+ if (!clone_attach_nbd(path, orig))
+ return false;
+ conf->nbd_idx = i;
+ return true;
+ }
+}
+
+static bool requires_nbd(const char *path)
+{
+ if (strncmp(path, "nbd:", 4) == 0)
+ return true;
+ return false;
+}
+
+/*
+ * attach_block_device returns true if all went well,
+ * meaning either a block device was attached or was not
+ * needed. It returns false if something went wrong and
+ * container startup shoudl be stopped.
+ */
+bool attach_block_device(struct lxc_conf *conf)
+{
+ char *path;
+
+ if (!conf->rootfs.path)
+ return true;
+ path = conf->rootfs.path;
+ if (!requires_nbd(path))
+ return true;
+ path = strchr(path, ':');
+ if (!path)
+ return false;
+ path++;
+ if (!attach_nbd(path, conf))
+ return false;
+ return true;
+}
+
+void detach_nbd_idx(int idx)
+{
+ int ret;
+ char path[50];
+
+ ret = snprintf(path, 50, "/dev/nbd%d", idx);
+ if (ret < 0 || ret >= 50)
+ return;
+
+ nbd_detach(path);
+}
+
+void detach_block_device(struct lxc_conf *conf)
+{
+ if (conf->nbd_idx != -1)
+ detach_nbd_idx(conf->nbd_idx);
+}
+
+/*
+ * Pick the partition # off the end of a nbd:file:p
+ * description. Return 1-9 for the partition id, or 0
+ * for no partition.
+ */
+static int nbd_get_partition(const char *src)
+{
+ char *p = strchr(src, ':');
+ if (!p)
+ return 0;
+ p = strchr(p+1, ':');
+ if (!p)
+ return 0;
+ p++;
+ if (*p < '1' && *p > '9')
+ return 0;
+ return *p - '0';
+}
+
+static int nbd_mount(struct bdev *bdev)
+{
+ int ret = -1, partition;
+ char path[50];
+
+ if (strcmp(bdev->type, "nbd"))
+ return -22;
+ if (!bdev->src || !bdev->dest)
+ return -22;
+
+ /* nbd_idx should have been copied by bdev_init from the lxc_conf */
+ if (bdev->nbd_idx < 0)
+ return -22;
+ partition = nbd_get_partition(bdev->src);
+ if (partition)
+ ret = snprintf(path, 50, "/dev/nbd%dp%d", bdev->nbd_idx,
+ partition);
+ else
+ ret = snprintf(path, 50, "/dev/nbd%d", bdev->nbd_idx);
+ if (ret < 0 || ret >= 50) {
+ ERROR("Error setting up nbd device path");
+ return ret;
+ }
+ ret = mount_unknown_fs(path, bdev->dest, bdev->mntopts);
+ if (ret < 0)
+ ERROR("Error mounting %s", bdev->src);
+
+ return ret;
+}
+
+static int nbd_create(struct bdev *bdev, const char *dest, const char *n,
+ struct bdev_specs *specs)
+{
+ return -ENOSYS;
+}
+
+static int nbd_clonepaths(struct bdev *orig, struct bdev *new, const char *oldname,
+ const char *cname, const char *oldpath, const char *lxcpath, int snap,
+ uint64_t newsize, struct lxc_conf *conf)
+{
+ return -ENOSYS;
+}
+
+static int nbd_destroy(struct bdev *orig)
+{
+ return -ENOSYS;
+}
+
+static int nbd_umount(struct bdev *bdev)
+{
+ int ret;
+
+ if (strcmp(bdev->type, "nbd"))
+ return -22;
+ if (!bdev->src || !bdev->dest)
+ return -22;
+ ret = umount(bdev->dest);
+ return ret;
+}
+
+static const struct bdev_ops nbd_ops = {
+ .detect = &nbd_detect,
+ .mount = &nbd_mount,
+ .umount = &nbd_umount,
+ .clone_paths = &nbd_clonepaths,
+ .destroy = &nbd_destroy,
+ .create = &nbd_create,
+ .can_snapshot = true,
+};
static const struct bdev_type bdevs[] = {
{.name = "zfs", .ops = &zfs_ops,},
{.name = "aufs", .ops = &aufs_ops,},
{.name = "overlayfs", .ops = &overlayfs_ops,},
{.name = "loop", .ops = &loop_ops,},
+ {.name = "nbd", .ops = &nbd_ops,},
};
static const size_t numbdevs = sizeof(bdevs) / sizeof(struct bdev_type);
return bdev;
}
-struct bdev *bdev_init(const char *src, const char *dst, const char *mntopts)
+struct bdev *bdev_init(struct lxc_conf *conf, const char *src, const char *dst, const char *mntopts)
{
int i;
struct bdev *bdev;
bdev->src = strdup(src);
if (dst)
bdev->dest = strdup(dst);
+ if (strcmp(bdev->type, "nbd") == 0)
+ bdev->nbd_idx = conf->nbd_idx;
return bdev;
}
return rsync_rootfs(arg);
}
-bool bdev_is_dir(const char *path)
+bool bdev_is_dir(struct lxc_conf *conf, const char *path)
{
- struct bdev *orig = bdev_init(path, NULL, NULL);
+ struct bdev *orig = bdev_init(conf, path, NULL, NULL);
bool ret = false;
if (!orig)
return ret;
return NULL;
}
- orig = bdev_init(src, NULL, NULL);
+ orig = bdev_init(c0->lxc_conf, src, NULL, NULL);
if (!orig) {
ERROR("failed to detect blockdev type for %s", src);
return NULL;
#ifndef __LXC_BDEV_H
#define __LXC_BDEV_H
/* blockdev operations for:
- * aufs, dir, raw, btrfs, overlayfs, aufs, lvm, loop, zfs
- * someday: qemu-nbd, qcow2, qed
+ * aufs, dir, raw, btrfs, overlayfs, aufs, lvm, loop, zfs, nbd (qcow2, raw, vdi, qed)
*/
#include "config.h"
// turn the following into a union if need be
// lofd is the open fd for the mounted loopback file
int lofd;
+ // index for the connected nbd device
+ int nbd_idx;
};
char *overlay_getlower(char *p);
-bool bdev_is_dir(const char *path);
+bool bdev_is_dir(struct lxc_conf *conf, const char *path);
/*
* Instantiate a bdev object. The src is used to determine which blockdev
* use /var/lib/lxc/canonical/rootfs as lower dir, and /var/lib/lxc/c1/delta
* as the upper, writeable layer.
*/
-struct bdev *bdev_init(const char *src, const char *dst, const char *data);
+struct bdev *bdev_init(struct lxc_conf *conf, const char *src, const char *dst,
+ const char *data);
struct bdev *bdev_copy(struct lxc_container *c0, const char *cname,
const char *lxcpath, const char *bdevtype,
const char *cname, struct bdev_specs *specs);
void bdev_put(struct bdev *bdev);
+/*
+ * these are really for qemu-nbd support, as container shutdown
+ * must explicitly request device detach.
+ */
+bool attach_block_device(struct lxc_conf *conf);
+void detach_block_device(struct lxc_conf *conf);
+
/* define constants if the kernel/glibc headers don't define them */
#ifndef MS_DIRSYNC
#define MS_DIRSYNC 128
lxc_log_define(lxc_container, lxc);
-static bool file_exists(const char *f)
-{
- struct stat statbuf;
-
- return stat(f, &statbuf) == 0;
-}
-
static bool config_file_exists(const char *lxcpath, const char *cname)
{
/* $lxcpath + '/' + $cname + '/config' + \0 */
if (strncmp(src, "aufs:", 5) == 0)
src = overlay_getlower(src+5);
- bdev = bdev_init(src, c->lxc_conf->rootfs.mount, NULL);
+ bdev = bdev_init(c->lxc_conf, src, c->lxc_conf->rootfs.mount, NULL);
if (!bdev) {
ERROR("Error opening rootfs");
exit(1);
struct bdev *r;
int ret = 0;
- r = bdev_init(conf->rootfs.path, conf->rootfs.mount, NULL);
+ r = bdev_init(conf, conf->rootfs.path, conf->rootfs.mount, NULL);
if (!r)
return -1;
if (unshare(CLONE_NEWNS) < 0)
return -1;
- bdev = bdev_init(c->lxc_conf->rootfs.path, c->lxc_conf->rootfs.mount, NULL);
+ bdev = bdev_init(c->lxc_conf, c->lxc_conf->rootfs.path, c->lxc_conf->rootfs.mount, NULL);
if (!bdev)
return -1;
if (strcmp(bdev->type, "dir") != 0) {
if (!c || !c->name || !c->config_path || !c->lxc_conf)
return false;
- bdev = bdev_init(c->lxc_conf->rootfs.path, c->lxc_conf->rootfs.mount, NULL);
+ bdev = bdev_init(c->lxc_conf, c->lxc_conf->rootfs.path, c->lxc_conf->rootfs.mount, NULL);
if (!bdev) {
ERROR("Failed to find original backing store type");
return false;
*/
flags = LXC_CLONE_SNAPSHOT | LXC_CLONE_KEEPMACADDR | LXC_CLONE_KEEPNAME |
LXC_CLONE_KEEPBDEVTYPE | LXC_CLONE_MAYBE_SNAPSHOT;
- if (bdev_is_dir(c->lxc_conf->rootfs.path)) {
+ if (bdev_is_dir(c->lxc_conf, c->lxc_conf->rootfs.path)) {
ERROR("Snapshot of directory-backed container requested.");
ERROR("Making a copy-clone. If you do want snapshots, then");
ERROR("please create an aufs or overlayfs clone first, snapshot that");
if (!c || !c->name || !c->config_path)
return false;
- bdev = bdev_init(c->lxc_conf->rootfs.path, c->lxc_conf->rootfs.mount, NULL);
+ bdev = bdev_init(c->lxc_conf, c->lxc_conf->rootfs.path, c->lxc_conf->rootfs.mount, NULL);
if (!bdev) {
ERROR("Failed to find original backing store type");
return false;