From: Christian Brauner <christian.brauner@mailbox.org>
Date: Wed, 30 Dec 2015 16:34:08 +0000 (+0100)
Subject: Cleanup bdev.c after splitting into modules
X-Git-Tag: lxc-2.0.0.beta2~65^2
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=cdb4e53a7df61f5f3c4a4b03ae5990cb4c86263e;p=thirdparty%2Flxc.git

Cleanup bdev.c after splitting into modules

The function

	- bdev_get();

becomes static. It is called from nowhere else so far and never appeared in any
header.

Minor changes

	- Avoid comparisons between int and size_t types. Use size_t where
	  possible else cast to size_t when it makes sense.
	- insert missing spaces between operators
	- put declarations for all static functions at the top

Signed-off-by: Christian Brauner <christian.brauner@mailbox.org>
---

diff --git a/src/lxc/bdev/bdev.c b/src/lxc/bdev/bdev.c
index 96b0a5967..14f4a9e76 100644
--- a/src/lxc/bdev/bdev.c
+++ b/src/lxc/bdev/bdev.c
@@ -189,12 +189,30 @@ static const struct bdev_ops zfs_ops = {
 	.can_backup = true,
 };
 
+struct bdev_type {
+	const char *name;
+	const struct bdev_ops *ops;
+};
+
+static const struct bdev_type bdevs[] = {
+	{.name = "zfs", .ops = &zfs_ops,},
+	{.name = "lvm", .ops = &lvm_ops,},
+	{.name = "rbd", .ops = &rbd_ops,},
+	{.name = "btrfs", .ops = &btrfs_ops,},
+	{.name = "dir", .ops = &dir_ops,},
+	{.name = "aufs", .ops = &aufs_ops,},
+	{.name = "overlayfs", .ops = &ovl_ops,},
+	{.name = "loop", .ops = &loop_ops,},
+	{.name = "nbd", .ops = &nbd_ops,},
+};
+
+static const size_t numbdevs = sizeof(bdevs) / sizeof(struct bdev_type);
+
 /* helpers */
-/*
- * These are copied from conf.c.  However as conf.c will be moved to using
- * the callback system, they can be pulled from there eventually, so we
- * don't need to pollute utils.c with these low level functions
- */
+static const struct bdev_type *bdev_query(const char *src);
+static struct bdev *bdev_get(const char *type);
+static struct bdev *do_bdev_create(const char *dest, const char *type,
+		const char *cname, struct bdev_specs *specs);
 static int find_fstype_cb(char *buffer, void *data);
 static char *linkderef(char *path, char *dest);
 static bool unpriv_snap_allowed(struct bdev *b, const char *t, bool snap,
@@ -233,8 +251,8 @@ char *dir_new_path(char *src, const char *oldname, const char *name,
 	}
 
 	while ((p2 = strstr(src, oldname)) != NULL) {
-		strncpy(p, src, p2-src); // copy text up to oldname
-		p += p2-src; // move target pointer (p)
+		strncpy(p, src, p2 - src); // copy text up to oldname
+		p += p2 - src; // move target pointer (p)
 		p += sprintf(p, "%s", name); // print new name in place of oldname
 		src = p2 + l2;  // move src to end of oldname
 	}
@@ -243,156 +261,361 @@ char *dir_new_path(char *src, const char *oldname, const char *name,
 }
 
 /*
- * return block size of dev->src in units of bytes
+ * attach_block_device returns true if all went well,
+ * meaning either a block device was attached or was not
+ * needed.  It returns false if something went wrong and
+ * container startup should be stopped.
  */
-int blk_getsize(struct bdev *bdev, uint64_t *size)
+bool attach_block_device(struct lxc_conf *conf)
 {
-	int fd, ret;
-	char *path = bdev->src;
+	char *path;
 
-	if (strcmp(bdev->type, "loop") == 0)
-		path = bdev->src + 5;
+	if (!conf->rootfs.path)
+		return true;
+	path = conf->rootfs.path;
+	if (!requires_nbd(path))
+		return true;
+	path = strchr(path, ':');
+	if (!path)
+		return false;
+	path++;
+	if (!attach_nbd(path, conf))
+		return false;
+	return true;
+}
 
-	fd = open(path, O_RDONLY);
-	if (fd < 0)
-		return -1;
+bool bdev_can_backup(struct lxc_conf *conf)
+{
+	struct bdev *bdev = bdev_init(conf, NULL, NULL, NULL);
+	bool ret;
 
-	ret = ioctl(fd, BLKGETSIZE64, size); // size of device in bytes
-	close(fd);
+	if (!bdev)
+		return false;
+	ret = bdev->ops->can_backup;
+	bdev_put(bdev);
 	return ret;
 }
 
 /*
- * These are copied from conf.c.  However as conf.c will be moved to using
- * the callback system, they can be pulled from there eventually, so we
- * don't need to pollute utils.c with these low level functions
+ * If we're not snaphotting, then bdev_copy becomes a simple case of mount
+ * the original, mount the new, and rsync the contents.
  */
-static int find_fstype_cb(char* buffer, void *data)
+struct bdev *bdev_copy(struct lxc_container *c0, const char *cname,
+		const char *lxcpath, const char *bdevtype, int flags,
+		const char *bdevdata, uint64_t newsize, int *needs_rdep)
 {
-	struct cbarg {
-		const char *rootfs;
-		const char *target;
-		const char *options;
-	} *cbarg = data;
+	struct bdev *orig, *new;
+	pid_t pid;
+	int ret;
+	bool snap = flags & LXC_CLONE_SNAPSHOT;
+	bool maybe_snap = flags & LXC_CLONE_MAYBE_SNAPSHOT;
+	bool keepbdevtype = flags & LXC_CLONE_KEEPBDEVTYPE;
+	const char *src = c0->lxc_conf->rootfs.path;
+	const char *oldname = c0->name;
+	const char *oldpath = c0->config_path;
+	struct rsync_data data;
 
-	unsigned long mntflags;
-	char *mntdata;
-	char *fstype;
+	/* if the container name doesn't show up in the rootfs path, then
+	 * we don't know how to come up with a new name
+	 */
+	if (strstr(src, oldname) == NULL) {
+		ERROR("original rootfs path %s doesn't include container name %s",
+			src, oldname);
+		return NULL;
+	}
 
-	/* we don't try 'nodev' entries */
-	if (strstr(buffer, "nodev"))
-		return 0;
+	orig = bdev_init(c0->lxc_conf, src, NULL, NULL);
+	if (!orig) {
+		ERROR("failed to detect blockdev type for %s", src);
+		return NULL;
+	}
 
-	fstype = buffer;
-	fstype += lxc_char_left_gc(fstype, strlen(fstype));
-	fstype[lxc_char_right_gc(fstype, strlen(fstype))] = '\0';
+	if (!orig->dest) {
+		int ret;
+		size_t len;
+		struct stat sb;
 
-	DEBUG("trying to mount '%s'->'%s' with fstype '%s'",
-	      cbarg->rootfs, cbarg->target, fstype);
+		len = strlen(oldpath) + strlen(oldname) + strlen("/rootfs") + 2;
+		orig->dest = malloc(len);
+		if (!orig->dest) {
+			ERROR("out of memory");
+			bdev_put(orig);
+			return NULL;
+		}
+		ret = snprintf(orig->dest, len, "%s/%s/rootfs", oldpath, oldname);
+		if (ret < 0 || (size_t)ret >= len) {
+			ERROR("rootfs path too long");
+			bdev_put(orig);
+			return NULL;
+		}
+		ret = stat(orig->dest, &sb);
+		if (ret < 0 && errno == ENOENT)
+			if (mkdir_p(orig->dest, 0755) < 0)
+				WARN("Error creating '%s', continuing.", orig->dest);
+	}
 
-	if (parse_mntopts(cbarg->options, &mntflags, &mntdata) < 0) {
-		free(mntdata);
-		return 0;
+	/*
+	 * special case for snapshot - if caller requested maybe_snapshot and
+	 * keepbdevtype and backing store is directory, then proceed with a copy
+	 * clone rather than returning error
+	 */
+	if (maybe_snap && keepbdevtype && !bdevtype && !orig->ops->can_snapshot)
+		snap = false;
+
+	/*
+	 * If newtype is NULL and snapshot is set, then use overlayfs
+	 */
+	if (!bdevtype && !keepbdevtype && snap && strcmp(orig->type , "dir") == 0)
+		bdevtype = "overlayfs";
+
+	if (am_unpriv() && !unpriv_snap_allowed(orig, bdevtype, snap, maybe_snap)) {
+		ERROR("Unsupported snapshot type for unprivileged users");
+		bdev_put(orig);
+		return NULL;
 	}
 
-	if (mount(cbarg->rootfs, cbarg->target, fstype, mntflags, mntdata)) {
-		DEBUG("mount failed with error: %s", strerror(errno));
-		free(mntdata);
-		return 0;
+	*needs_rdep = 0;
+	if (bdevtype && strcmp(orig->type, "dir") == 0 &&
+			(strcmp(bdevtype, "aufs") == 0 ||
+			 strcmp(bdevtype, "overlayfs") == 0)) {
+		*needs_rdep = 1;
+	} else if (snap && strcmp(orig->type, "lvm") == 0 &&
+			!lvm_is_thin_volume(orig->src)) {
+		*needs_rdep = 1;
 	}
 
-	free(mntdata);
+	new = bdev_get(bdevtype ? bdevtype : orig->type);
+	if (!new) {
+		ERROR("no such block device type: %s", bdevtype ? bdevtype : orig->type);
+		bdev_put(orig);
+		return NULL;
+	}
 
-	INFO("mounted '%s' on '%s', with fstype '%s'",
-	     cbarg->rootfs, cbarg->target, fstype);
+	if (new->ops->clone_paths(orig, new, oldname, cname, oldpath, lxcpath,
+				snap, newsize, c0->lxc_conf) < 0) {
+		ERROR("failed getting pathnames for cloned storage: %s", src);
+		goto err;
+	}
 
-	return 1;
-}
+	if (am_unpriv() && chown_mapped_root(new->src, c0->lxc_conf) < 0)
+		WARN("Failed to update ownership of %s", new->dest);
 
-int mount_unknown_fs(const char *rootfs, const char *target,
-		const char *options)
-{
-	struct cbarg {
-		const char *rootfs;
-		const char *target;
-		const char *options;
-	} cbarg = {
-		.rootfs = rootfs,
-		.target = target,
-		.options = options,
-	};
+	if (snap)
+		return new;
 
 	/*
-	 * find the filesystem type with brute force:
-	 * first we check with /etc/filesystems, in case the modules
-	 * are auto-loaded and fall back to the supported kernel fs
+	 * https://github.com/lxc/lxc/issues/131
+	 * Use btrfs snapshot feature instead of rsync to restore if both orig and new are btrfs
 	 */
-	char *fsfile[] = {
-		"/etc/filesystems",
-		"/proc/filesystems",
-	};
-
-	size_t i;
-	for (i = 0; i < sizeof(fsfile)/sizeof(fsfile[0]); i++) {
-
-		int ret;
+	if (bdevtype &&
+			strcmp(orig->type, "btrfs") == 0 && strcmp(new->type, "btrfs") == 0 &&
+			btrfs_same_fs(orig->dest, new->dest) == 0) {
+		if (btrfs_destroy(new) < 0) {
+			ERROR("Error destroying %s subvolume", new->dest);
+			goto err;
+		}
+		if (mkdir_p(new->dest, 0755) < 0) {
+			ERROR("Error creating %s directory", new->dest);
+			goto err;
+		}
+		if (btrfs_snapshot(orig->dest, new->dest) < 0) {
+			ERROR("Error restoring %s to %s", orig->dest, new->dest);
+			goto err;
+		}
+		bdev_put(orig);
+		return new;
+	}
 
-		if (access(fsfile[i], F_OK))
-			continue;
+	pid = fork();
+	if (pid < 0) {
+		SYSERROR("fork");
+		goto err;
+	}
 
-		ret = lxc_file_for_each_line(fsfile[i], find_fstype_cb, &cbarg);
+	if (pid > 0) {
+		int ret = wait_for_pid(pid);
+		bdev_put(orig);
 		if (ret < 0) {
-			ERROR("failed to parse '%s'", fsfile[i]);
-			return -1;
+			bdev_put(new);
+			return NULL;
 		}
-
-		if (ret)
-			return 0;
+		return new;
 	}
 
-	ERROR("failed to determine fs type for '%s'", rootfs);
-	return -1;
-}
+	data.orig = orig;
+	data.new = new;
+	if (am_unpriv())
+		ret = userns_exec_1(c0->lxc_conf, rsync_rootfs_wrapper, &data);
+	else
+		ret = rsync_rootfs(&data);
 
-int do_mkfs(const char *path, const char *fstype)
+	exit(ret == 0 ? 0 : 1);
+
+err:
+	bdev_put(orig);
+	bdev_put(new);
+	return NULL;
+}
+
+/*
+ * bdev_create:
+ * Create a backing store for a container.
+ * If successful, return a struct bdev *, with the bdev mounted and ready
+ * for use.  Before completing, the caller will need to call the
+ * umount operation and bdev_put().
+ * @dest: the mountpoint (i.e. /var/lib/lxc/$name/rootfs)
+ * @type: the bdevtype (dir, btrfs, zfs, rbd, etc)
+ * @cname: the container name
+ * @specs: details about the backing store to create, like fstype
+ */
+struct bdev *bdev_create(const char *dest, const char *type, const char *cname,
+		struct bdev_specs *specs)
 {
-	pid_t pid;
+	struct bdev *bdev;
+	char *best_options[] = {"btrfs", "zfs", "lvm", "dir", "rbd", NULL};
 
-	if ((pid = fork()) < 0) {
-		ERROR("error forking");
-		return -1;
+	if (!type)
+		return do_bdev_create(dest, "dir", cname, specs);
+
+	if (strcmp(type, "best") == 0) {
+		int i;
+		// try for the best backing store type, according to our
+		// opinionated preferences
+		for (i = 0; best_options[i]; i++) {
+			if ((bdev = do_bdev_create(dest, best_options[i], cname, specs)))
+				return bdev;
+		}
+		return NULL;  // 'dir' should never fail, so this shouldn't happen
 	}
-	if (pid > 0)
-		return wait_for_pid(pid);
 
-	// If the file is not a block device, we don't want mkfs to ask
-	// us about whether to proceed.
-	if (null_stdfds() < 0)
-		exit(1);
-	execlp("mkfs", "mkfs", "-t", fstype, path, NULL);
-	exit(1);
+	// -B lvm,dir
+	if (strchr(type, ',') != NULL) {
+		char *dup = alloca(strlen(type) + 1), *saveptr = NULL, *token;
+		strcpy(dup, type);
+		for (token = strtok_r(dup, ",", &saveptr); token;
+				token = strtok_r(NULL, ",", &saveptr)) {
+			if ((bdev = do_bdev_create(dest, token, cname, specs)))
+				return bdev;
+		}
+	}
+
+	return do_bdev_create(dest, type, cname, specs);
 }
 
-static char *linkderef(char *path, char *dest)
+bool bdev_destroy(struct lxc_conf *conf)
 {
-	struct stat sbuf;
-	ssize_t ret;
+	struct bdev *r;
+	bool ret = false;
 
-	ret = stat(path, &sbuf);
-	if (ret < 0)
+	r = bdev_init(conf, conf->rootfs.path, conf->rootfs.mount, NULL);
+	if (!r)
+		return ret;
+
+	if (r->ops->destroy(r) == 0)
+		ret = true;
+	bdev_put(r);
+
+	return ret;
+}
+
+int bdev_destroy_wrapper(void *data)
+{
+	struct lxc_conf *conf = data;
+
+	if (setgid(0) < 0) {
+		ERROR("Failed to setgid to 0");
+		return -1;
+	}
+	if (setgroups(0, NULL) < 0)
+		WARN("Failed to clear groups");
+	if (setuid(0) < 0) {
+		ERROR("Failed to setuid to 0");
+		return -1;
+	}
+	if (!bdev_destroy(conf))
+		return -1;
+	else
+		return 0;
+}
+
+struct bdev *bdev_init(struct lxc_conf *conf, const char *src, const char *dst,
+		const char *mntopts)
+{
+	struct bdev *bdev;
+	const struct bdev_type *q;
+
+	if (!src)
+		src = conf->rootfs.path;
+
+	if (!src)
 		return NULL;
-	if (!S_ISLNK(sbuf.st_mode))
-		return path;
-	ret = readlink(path, dest, MAXPATHLEN);
-	if (ret < 0) {
-		SYSERROR("error reading link %s", path);
+
+	q = bdev_query(src);
+	if (!q)
 		return NULL;
-	} else if (ret >= MAXPATHLEN) {
-		ERROR("link in %s too long", path);
+
+	bdev = malloc(sizeof(struct bdev));
+	if (!bdev)
 		return NULL;
-	}
-	dest[ret] = '\0';
-	return dest;
+	memset(bdev, 0, sizeof(struct bdev));
+	bdev->ops = q->ops;
+	bdev->type = q->name;
+	if (mntopts)
+		bdev->mntopts = strdup(mntopts);
+	if (src)
+		bdev->src = strdup(src);
+	if (dst)
+		bdev->dest = strdup(dst);
+	if (strcmp(bdev->type, "nbd") == 0)
+		bdev->nbd_idx = conf->nbd_idx;
+
+	return bdev;
+}
+
+bool bdev_is_dir(struct lxc_conf *conf, const char *path)
+{
+	struct bdev *orig = bdev_init(conf, path, NULL, NULL);
+	bool ret = false;
+	if (!orig)
+		return ret;
+	if (strcmp(orig->type, "dir") == 0)
+		ret = true;
+	bdev_put(orig);
+	return ret;
+}
+
+void bdev_put(struct bdev *bdev)
+{
+	free(bdev->mntopts);
+	free(bdev->src);
+	free(bdev->dest);
+	free(bdev);
+}
+
+/*
+ * return block size of dev->src in units of bytes
+ */
+int blk_getsize(struct bdev *bdev, uint64_t *size)
+{
+	int fd, ret;
+	char *path = bdev->src;
+
+	if (strcmp(bdev->type, "loop") == 0)
+		path = bdev->src + 5;
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0)
+		return -1;
+
+	ret = ioctl(fd, BLKGETSIZE64, size); // size of device in bytes
+	close(fd);
+	return ret;
+}
+
+void detach_block_device(struct lxc_conf *conf)
+{
+	if (conf->nbd_idx != -1)
+		detach_nbd_idx(conf->nbd_idx);
 }
 
 /*
@@ -428,7 +651,7 @@ int detect_fs(struct bdev *bdev, char *type, int len)
 		int status;
 		close(p[1]);
 		memset(type, 0, len);
-		ret = read(p[0], type, len-1);
+		ret = read(p[0], type, len - 1);
 		close(p[0]);
 		if (ret < 0) {
 			SYSERROR("error reading from pipe");
@@ -440,7 +663,7 @@ int detect_fs(struct bdev *bdev, char *type, int len)
 			return -1;
 		}
 		wait(&status);
-		type[len-1] = '\0';
+		type[len - 1] = '\0';
 		INFO("detected fstype %s for %s", type, srcdev);
 		return ret;
 	}
@@ -475,11 +698,11 @@ int detect_fs(struct bdev *bdev, char *type, int len)
 		*sp1 = '\0';
 		if (strcmp(line, l))
 			continue;
-		sp2 = strchr(sp1+1, ' ');
+		sp2 = strchr(sp1 + 1, ' ');
 		if (!sp2)
 			exit(1);
 		*sp2 = '\0';
-		sp3 = strchr(sp2+1, ' ');
+		sp3 = strchr(sp2 + 1, ' ');
 		if (!sp3)
 			exit(1);
 		*sp3 = '\0';
@@ -491,13 +714,29 @@ int detect_fs(struct bdev *bdev, char *type, int len)
 	exit(1);
 }
 
-struct bdev_type {
-	const char *name;
-	const struct bdev_ops *ops;
-};
+int do_mkfs(const char *path, const char *fstype)
+{
+	pid_t pid;
+
+	if ((pid = fork()) < 0) {
+		ERROR("error forking");
+		return -1;
+	}
+	if (pid > 0)
+		return wait_for_pid(pid);
+
+	// If the file is not a block device, we don't want mkfs to ask
+	// us about whether to proceed.
+	if (null_stdfds() < 0)
+		exit(1);
+	execlp("mkfs", "mkfs", "-t", fstype, path, NULL);
+	exit(1);
+}
 
-// this will return 1 for physical disks, qemu-nbd, loop, etc
-// right now only lvm is a block device
+/*
+ * This will return 1 for physical disks, qemu-nbd, loop, etc right now only lvm
+ * is a block device.
+ */
 int is_blktype(struct bdev *b)
 {
 	if (strcmp(b->type, "lvm") == 0)
@@ -505,39 +744,102 @@ int is_blktype(struct bdev *b)
 	return 0;
 }
 
-static const struct bdev_type bdevs[] = {
-	{.name = "zfs", .ops = &zfs_ops,},
-	{.name = "lvm", .ops = &lvm_ops,},
-	{.name = "rbd", .ops = &rbd_ops,},
-	{.name = "btrfs", .ops = &btrfs_ops,},
-	{.name = "dir", .ops = &dir_ops,},
-	{.name = "aufs", .ops = &aufs_ops,},
-	{.name = "overlayfs", .ops = &ovl_ops,},
-	{.name = "loop", .ops = &loop_ops,},
-	{.name = "nbd", .ops = &nbd_ops,},
-};
+int mount_unknown_fs(const char *rootfs, const char *target,
+		const char *options)
+{
+	struct cbarg {
+		const char *rootfs;
+		const char *target;
+		const char *options;
+	} cbarg = {
+		.rootfs = rootfs,
+		.target = target,
+		.options = options,
+	};
 
-static const size_t numbdevs = sizeof(bdevs) / sizeof(struct bdev_type);
+	/*
+	 * find the filesystem type with brute force:
+	 * first we check with /etc/filesystems, in case the modules
+	 * are auto-loaded and fall back to the supported kernel fs
+	 */
+	char *fsfile[] = {
+		"/etc/filesystems",
+		"/proc/filesystems",
+	};
 
-void bdev_put(struct bdev *bdev)
-{
-	free(bdev->mntopts);
-	free(bdev->src);
-	free(bdev->dest);
-	free(bdev);
-}
+	size_t i;
+	for (i = 0; i < sizeof(fsfile) / sizeof(fsfile[0]); i++) {
 
-struct bdev *bdev_get(const char *type)
-{
-	int i;
-	struct bdev *bdev;
+		int ret;
 
-	for (i=0; i<numbdevs; i++) {
-		if (strcmp(bdevs[i].name, type) == 0)
-			break;
-	}
-	if (i == numbdevs)
-		return NULL;
+		if (access(fsfile[i], F_OK))
+			continue;
+
+		ret = lxc_file_for_each_line(fsfile[i], find_fstype_cb, &cbarg);
+		if (ret < 0) {
+			ERROR("failed to parse '%s'", fsfile[i]);
+			return -1;
+		}
+
+		if (ret)
+			return 0;
+	}
+
+	ERROR("failed to determine fs type for '%s'", rootfs);
+	return -1;
+}
+
+bool rootfs_is_blockdev(struct lxc_conf *conf)
+{
+	const struct bdev_type *q;
+	struct stat st;
+	int ret;
+
+	if (!conf->rootfs.path || strcmp(conf->rootfs.path, "/") == 0 ||
+		strlen(conf->rootfs.path) == 0)
+		return false;
+
+	ret = stat(conf->rootfs.path, &st);
+	if (ret == 0 && S_ISBLK(st.st_mode))
+		return true;
+	q = bdev_query(conf->rootfs.path);
+	if (!q)
+		return false;
+	if (strcmp(q->name, "lvm") == 0 ||
+		strcmp(q->name, "loop") == 0 ||
+		strcmp(q->name, "nbd") == 0)
+		return true;
+	return false;
+}
+
+static struct bdev *do_bdev_create(const char *dest, const char *type,
+		const char *cname, struct bdev_specs *specs)
+{
+
+	struct bdev *bdev = bdev_get(type);
+	if (!bdev) {
+		return NULL;
+	}
+
+	if (bdev->ops->create(bdev, dest, cname, specs) < 0) {
+		 bdev_put(bdev);
+		 return NULL;
+	}
+
+	return bdev;
+}
+
+static struct bdev *bdev_get(const char *type)
+{
+	int i;
+	struct bdev *bdev;
+
+	for (i = 0; i < numbdevs; i++) {
+		if (strcmp(bdevs[i].name, type) == 0)
+			break;
+	}
+	if (i == numbdevs)
+		return NULL;
 	bdev = malloc(sizeof(struct bdev));
 	if (!bdev)
 		return NULL;
@@ -550,7 +852,7 @@ struct bdev *bdev_get(const char *type)
 static const struct bdev_type *bdev_query(const char *src)
 {
 	int i;
-	for (i=0; i<numbdevs; i++) {
+	for (i = 0; i < numbdevs; i++) {
 		int r;
 		r = bdevs[i].ops->detect(src);
 		if (r)
@@ -562,62 +864,73 @@ static const struct bdev_type *bdev_query(const char *src)
 	return &bdevs[i];
 }
 
-struct bdev *bdev_init(struct lxc_conf *conf, const char *src, const char *dst,
-		const char *mntopts)
+/*
+ * These are copied from conf.c.  However as conf.c will be moved to using
+ * the callback system, they can be pulled from there eventually, so we
+ * don't need to pollute utils.c with these low level functions
+ */
+static int find_fstype_cb(char* buffer, void *data)
 {
-	struct bdev *bdev;
-	const struct bdev_type *q;
+	struct cbarg {
+		const char *rootfs;
+		const char *target;
+		const char *options;
+	} *cbarg = data;
 
-	if (!src)
-		src = conf->rootfs.path;
+	unsigned long mntflags;
+	char *mntdata;
+	char *fstype;
 
-	if (!src)
-		return NULL;
+	/* we don't try 'nodev' entries */
+	if (strstr(buffer, "nodev"))
+		return 0;
 
-	q = bdev_query(src);
-	if (!q)
-		return NULL;
+	fstype = buffer;
+	fstype += lxc_char_left_gc(fstype, strlen(fstype));
+	fstype[lxc_char_right_gc(fstype, strlen(fstype))] = '\0';
 
-	bdev = malloc(sizeof(struct bdev));
-	if (!bdev)
-		return NULL;
-	memset(bdev, 0, sizeof(struct bdev));
-	bdev->ops = q->ops;
-	bdev->type = q->name;
-	if (mntopts)
-		bdev->mntopts = strdup(mntopts);
-	if (src)
-		bdev->src = strdup(src);
-	if (dst)
-		bdev->dest = strdup(dst);
-	if (strcmp(bdev->type, "nbd") == 0)
-		bdev->nbd_idx = conf->nbd_idx;
+	DEBUG("trying to mount '%s'->'%s' with fstype '%s'",
+	      cbarg->rootfs, cbarg->target, fstype);
 
-	return bdev;
-}
+	if (parse_mntopts(cbarg->options, &mntflags, &mntdata) < 0) {
+		free(mntdata);
+		return 0;
+	}
 
-bool bdev_is_dir(struct lxc_conf *conf, const char *path)
-{
-	struct bdev *orig = bdev_init(conf, path, NULL, NULL);
-	bool ret = false;
-	if (!orig)
-		return ret;
-	if (strcmp(orig->type, "dir") == 0)
-		ret = true;
-	bdev_put(orig);
-	return ret;
+	if (mount(cbarg->rootfs, cbarg->target, fstype, mntflags, mntdata)) {
+		DEBUG("mount failed with error: %s", strerror(errno));
+		free(mntdata);
+		return 0;
+	}
+
+	free(mntdata);
+
+	INFO("mounted '%s' on '%s', with fstype '%s'",
+	     cbarg->rootfs, cbarg->target, fstype);
+
+	return 1;
 }
 
-bool bdev_can_backup(struct lxc_conf *conf)
+static char *linkderef(char *path, char *dest)
 {
-	struct bdev *bdev = bdev_init(conf, NULL, NULL, NULL);
-	bool ret;
+	struct stat sbuf;
+	ssize_t ret;
 
-	if (!bdev)
-		return false;
-	ret = bdev->ops->can_backup;
-	bdev_put(bdev);
-	return ret;
+	ret = stat(path, &sbuf);
+	if (ret < 0)
+		return NULL;
+	if (!S_ISLNK(sbuf.st_mode))
+		return path;
+	ret = readlink(path, dest, MAXPATHLEN);
+	if (ret < 0) {
+		SYSERROR("error reading link %s", path);
+		return NULL;
+	} else if (ret >= MAXPATHLEN) {
+		ERROR("link in %s too long", path);
+		return NULL;
+	}
+	dest[ret] = '\0';
+	return dest;
 }
 
 /*
@@ -649,315 +962,3 @@ static bool unpriv_snap_allowed(struct bdev *b, const char *t, bool snap,
 		return true;
 	return false;
 }
-
-/*
- * If we're not snaphotting, then bdev_copy becomes a simple case of mount
- * the original, mount the new, and rsync the contents.
- */
-struct bdev *bdev_copy(struct lxc_container *c0, const char *cname,
-		const char *lxcpath, const char *bdevtype, int flags,
-		const char *bdevdata, uint64_t newsize, int *needs_rdep)
-{
-	struct bdev *orig, *new;
-	pid_t pid;
-	int ret;
-	bool snap = flags & LXC_CLONE_SNAPSHOT;
-	bool maybe_snap = flags & LXC_CLONE_MAYBE_SNAPSHOT;
-	bool keepbdevtype = flags & LXC_CLONE_KEEPBDEVTYPE;
-	const char *src = c0->lxc_conf->rootfs.path;
-	const char *oldname = c0->name;
-	const char *oldpath = c0->config_path;
-	struct rsync_data data;
-
-	/* if the container name doesn't show up in the rootfs path, then
-	 * we don't know how to come up with a new name
-	 */
-	if (strstr(src, oldname) == NULL) {
-		ERROR("original rootfs path %s doesn't include container name %s",
-			src, oldname);
-		return NULL;
-	}
-
-	orig = bdev_init(c0->lxc_conf, src, NULL, NULL);
-	if (!orig) {
-		ERROR("failed to detect blockdev type for %s", src);
-		return NULL;
-	}
-
-	if (!orig->dest) {
-		int ret;
-		size_t len;
-		struct stat sb;
-
-		len = strlen(oldpath) + strlen(oldname) + strlen("/rootfs") + 2;
-		orig->dest = malloc(len);
-		if (!orig->dest) {
-			ERROR("out of memory");
-			bdev_put(orig);
-			return NULL;
-		}
-		ret = snprintf(orig->dest, len, "%s/%s/rootfs", oldpath, oldname);
-		if (ret < 0 || ret >= len) {
-			ERROR("rootfs path too long");
-			bdev_put(orig);
-			return NULL;
-		}
-		ret = stat(orig->dest, &sb);
-		if (ret < 0 && errno == ENOENT)
-			if (mkdir_p(orig->dest, 0755) < 0)
-				WARN("Error creating '%s', continuing.", orig->dest);
-	}
-
-	/*
-	 * special case for snapshot - if caller requested maybe_snapshot and
-	 * keepbdevtype and backing store is directory, then proceed with a copy
-	 * clone rather than returning error
-	 */
-	if (maybe_snap && keepbdevtype && !bdevtype && !orig->ops->can_snapshot)
-		snap = false;
-
-	/*
-	 * If newtype is NULL and snapshot is set, then use overlayfs
-	 */
-	if (!bdevtype && !keepbdevtype && snap && strcmp(orig->type , "dir") == 0)
-		bdevtype = "overlayfs";
-
-	if (am_unpriv() && !unpriv_snap_allowed(orig, bdevtype, snap, maybe_snap)) {
-		ERROR("Unsupported snapshot type for unprivileged users");
-		bdev_put(orig);
-		return NULL;
-	}
-
-	*needs_rdep = 0;
-	if (bdevtype && strcmp(orig->type, "dir") == 0 &&
-			(strcmp(bdevtype, "aufs") == 0 ||
-			 strcmp(bdevtype, "overlayfs") == 0)) {
-		*needs_rdep = 1;
-	} else if (snap && strcmp(orig->type, "lvm") == 0 &&
-			!lvm_is_thin_volume(orig->src)) {
-		*needs_rdep = 1;
-	}
-
-	new = bdev_get(bdevtype ? bdevtype : orig->type);
-	if (!new) {
-		ERROR("no such block device type: %s", bdevtype ? bdevtype : orig->type);
-		bdev_put(orig);
-		return NULL;
-	}
-
-	if (new->ops->clone_paths(orig, new, oldname, cname, oldpath, lxcpath,
-				snap, newsize, c0->lxc_conf) < 0) {
-		ERROR("failed getting pathnames for cloned storage: %s", src);
-		goto err;
-	}
-
-	if (am_unpriv() && chown_mapped_root(new->src, c0->lxc_conf) < 0)
-		WARN("Failed to update ownership of %s", new->dest);
-
-	if (snap)
-		return new;
-
-	/*
-	 * https://github.com/lxc/lxc/issues/131
-	 * Use btrfs snapshot feature instead of rsync to restore if both orig and new are btrfs
-	 */
-	if (bdevtype &&
-			strcmp(orig->type, "btrfs") == 0 && strcmp(new->type, "btrfs") == 0 &&
-			btrfs_same_fs(orig->dest, new->dest) == 0) {
-		if (btrfs_destroy(new) < 0) {
-			ERROR("Error destroying %s subvolume", new->dest);
-			goto err;
-		}
-		if (mkdir_p(new->dest, 0755) < 0) {
-			ERROR("Error creating %s directory", new->dest);
-			goto err;
-		}
-		if (btrfs_snapshot(orig->dest, new->dest) < 0) {
-			ERROR("Error restoring %s to %s", orig->dest, new->dest);
-			goto err;
-		}
-		bdev_put(orig);
-		return new;
-	}
-
-	pid = fork();
-	if (pid < 0) {
-		SYSERROR("fork");
-		goto err;
-	}
-
-	if (pid > 0) {
-		int ret = wait_for_pid(pid);
-		bdev_put(orig);
-		if (ret < 0) {
-			bdev_put(new);
-			return NULL;
-		}
-		return new;
-	}
-
-	data.orig = orig;
-	data.new = new;
-	if (am_unpriv())
-		ret = userns_exec_1(c0->lxc_conf, rsync_rootfs_wrapper, &data);
-	else
-		ret = rsync_rootfs(&data);
-
-	exit(ret == 0 ? 0 : 1);
-
-err:
-	bdev_put(orig);
-	bdev_put(new);
-	return NULL;
-}
-
-static struct bdev *do_bdev_create(const char *dest, const char *type,
-		const char *cname, struct bdev_specs *specs)
-{
-
-	struct bdev *bdev = bdev_get(type);
-	if (!bdev) {
-		return NULL;
-	}
-
-	if (bdev->ops->create(bdev, dest, cname, specs) < 0) {
-		 bdev_put(bdev);
-		 return NULL;
-	}
-
-	return bdev;
-}
-
-/*
- * bdev_create:
- * Create a backing store for a container.
- * If successful, return a struct bdev *, with the bdev mounted and ready
- * for use.  Before completing, the caller will need to call the
- * umount operation and bdev_put().
- * @dest: the mountpoint (i.e. /var/lib/lxc/$name/rootfs)
- * @type: the bdevtype (dir, btrfs, zfs, rbd, etc)
- * @cname: the container name
- * @specs: details about the backing store to create, like fstype
- */
-struct bdev *bdev_create(const char *dest, const char *type, const char *cname,
-		struct bdev_specs *specs)
-{
-	struct bdev *bdev;
-	char *best_options[] = {"btrfs", "zfs", "lvm", "dir", "rbd", NULL};
-
-	if (!type)
-		return do_bdev_create(dest, "dir", cname, specs);
-
-	if (strcmp(type, "best") == 0) {
-		int i;
-		// try for the best backing store type, according to our
-		// opinionated preferences
-		for (i=0; best_options[i]; i++) {
-			if ((bdev = do_bdev_create(dest, best_options[i], cname, specs)))
-				return bdev;
-		}
-		return NULL;  // 'dir' should never fail, so this shouldn't happen
-	}
-
-	// -B lvm,dir
-	if (strchr(type, ',') != NULL) {
-		char *dup = alloca(strlen(type)+1), *saveptr = NULL, *token;
-		strcpy(dup, type);
-		for (token = strtok_r(dup, ",", &saveptr); token;
-				token = strtok_r(NULL, ",", &saveptr)) {
-			if ((bdev = do_bdev_create(dest, token, cname, specs)))
-				return bdev;
-		}
-	}
-
-	return do_bdev_create(dest, type, cname, specs);
-}
-
-bool rootfs_is_blockdev(struct lxc_conf *conf)
-{
-	const struct bdev_type *q;
-	struct stat st;
-	int ret;
-
-	if (!conf->rootfs.path || strcmp(conf->rootfs.path, "/") == 0 ||
-		strlen(conf->rootfs.path) == 0)
-		return false;
-
-	ret = stat(conf->rootfs.path, &st);
-	if (ret == 0 && S_ISBLK(st.st_mode))
-		return true;
-	q = bdev_query(conf->rootfs.path);
-	if (!q)
-		return false;
-	if (strcmp(q->name, "lvm") == 0 ||
-		strcmp(q->name, "loop") == 0 ||
-		strcmp(q->name, "nbd") == 0)
-		return true;
-	return false;
-}
-
-bool bdev_destroy(struct lxc_conf *conf)
-{
-	struct bdev *r;
-	bool ret = false;
-
-	r = bdev_init(conf, conf->rootfs.path, conf->rootfs.mount, NULL);
-	if (!r)
-		return ret;
-
-	if (r->ops->destroy(r) == 0)
-		ret = true;
-	bdev_put(r);
-
-	return ret;
-}
-
-int bdev_destroy_wrapper(void *data)
-{
-	struct lxc_conf *conf = data;
-
-	if (setgid(0) < 0) {
-		ERROR("Failed to setgid to 0");
-		return -1;
-	}
-	if (setgroups(0, NULL) < 0)
-		WARN("Failed to clear groups");
-	if (setuid(0) < 0) {
-		ERROR("Failed to setuid to 0");
-		return -1;
-	}
-	if (!bdev_destroy(conf))
-		return -1;
-	else
-		return 0;
-}
-
-/*
- * attach_block_device returns true if all went well,
- * meaning either a block device was attached or was not
- * needed.  It returns false if something went wrong and
- * container startup should be stopped.
- */
-bool attach_block_device(struct lxc_conf *conf)
-{
-	char *path;
-
-	if (!conf->rootfs.path)
-		return true;
-	path = conf->rootfs.path;
-	if (!requires_nbd(path))
-		return true;
-	path = strchr(path, ':');
-	if (!path)
-		return false;
-	path++;
-	if (!attach_nbd(path, conf))
-		return false;
-	return true;
-}
-
-void detach_block_device(struct lxc_conf *conf)
-{
-	if (conf->nbd_idx != -1)
-		detach_nbd_idx(conf->nbd_idx);
-}