.can_backup = true,
};
+struct bdev_type {
+ const char *name;
+ const struct bdev_ops *ops;
+};
+
+static const struct bdev_type bdevs[] = {
+ {.name = "zfs", .ops = &zfs_ops,},
+ {.name = "lvm", .ops = &lvm_ops,},
+ {.name = "rbd", .ops = &rbd_ops,},
+ {.name = "btrfs", .ops = &btrfs_ops,},
+ {.name = "dir", .ops = &dir_ops,},
+ {.name = "aufs", .ops = &aufs_ops,},
+ {.name = "overlayfs", .ops = &ovl_ops,},
+ {.name = "loop", .ops = &loop_ops,},
+ {.name = "nbd", .ops = &nbd_ops,},
+};
+
+static const size_t numbdevs = sizeof(bdevs) / sizeof(struct bdev_type);
+
/* helpers */
-/*
- * These are copied from conf.c. However as conf.c will be moved to using
- * the callback system, they can be pulled from there eventually, so we
- * don't need to pollute utils.c with these low level functions
- */
+static const struct bdev_type *bdev_query(const char *src);
+static struct bdev *bdev_get(const char *type);
+static struct bdev *do_bdev_create(const char *dest, const char *type,
+ const char *cname, struct bdev_specs *specs);
static int find_fstype_cb(char *buffer, void *data);
static char *linkderef(char *path, char *dest);
static bool unpriv_snap_allowed(struct bdev *b, const char *t, bool snap,
}
while ((p2 = strstr(src, oldname)) != NULL) {
- strncpy(p, src, p2-src); // copy text up to oldname
- p += p2-src; // move target pointer (p)
+ strncpy(p, src, p2 - src); // copy text up to oldname
+ p += p2 - src; // move target pointer (p)
p += sprintf(p, "%s", name); // print new name in place of oldname
src = p2 + l2; // move src to end of oldname
}
}
/*
- * return block size of dev->src in units of bytes
+ * attach_block_device returns true if all went well,
+ * meaning either a block device was attached or was not
+ * needed. It returns false if something went wrong and
+ * container startup should be stopped.
*/
-int blk_getsize(struct bdev *bdev, uint64_t *size)
+bool attach_block_device(struct lxc_conf *conf)
{
- int fd, ret;
- char *path = bdev->src;
+ char *path;
- if (strcmp(bdev->type, "loop") == 0)
- path = bdev->src + 5;
+ if (!conf->rootfs.path)
+ return true;
+ path = conf->rootfs.path;
+ if (!requires_nbd(path))
+ return true;
+ path = strchr(path, ':');
+ if (!path)
+ return false;
+ path++;
+ if (!attach_nbd(path, conf))
+ return false;
+ return true;
+}
- fd = open(path, O_RDONLY);
- if (fd < 0)
- return -1;
+bool bdev_can_backup(struct lxc_conf *conf)
+{
+ struct bdev *bdev = bdev_init(conf, NULL, NULL, NULL);
+ bool ret;
- ret = ioctl(fd, BLKGETSIZE64, size); // size of device in bytes
- close(fd);
+ if (!bdev)
+ return false;
+ ret = bdev->ops->can_backup;
+ bdev_put(bdev);
return ret;
}
/*
- * These are copied from conf.c. However as conf.c will be moved to using
- * the callback system, they can be pulled from there eventually, so we
- * don't need to pollute utils.c with these low level functions
+ * If we're not snaphotting, then bdev_copy becomes a simple case of mount
+ * the original, mount the new, and rsync the contents.
*/
-static int find_fstype_cb(char* buffer, void *data)
+struct bdev *bdev_copy(struct lxc_container *c0, const char *cname,
+ const char *lxcpath, const char *bdevtype, int flags,
+ const char *bdevdata, uint64_t newsize, int *needs_rdep)
{
- struct cbarg {
- const char *rootfs;
- const char *target;
- const char *options;
- } *cbarg = data;
+ struct bdev *orig, *new;
+ pid_t pid;
+ int ret;
+ bool snap = flags & LXC_CLONE_SNAPSHOT;
+ bool maybe_snap = flags & LXC_CLONE_MAYBE_SNAPSHOT;
+ bool keepbdevtype = flags & LXC_CLONE_KEEPBDEVTYPE;
+ const char *src = c0->lxc_conf->rootfs.path;
+ const char *oldname = c0->name;
+ const char *oldpath = c0->config_path;
+ struct rsync_data data;
- unsigned long mntflags;
- char *mntdata;
- char *fstype;
+ /* if the container name doesn't show up in the rootfs path, then
+ * we don't know how to come up with a new name
+ */
+ if (strstr(src, oldname) == NULL) {
+ ERROR("original rootfs path %s doesn't include container name %s",
+ src, oldname);
+ return NULL;
+ }
- /* we don't try 'nodev' entries */
- if (strstr(buffer, "nodev"))
- return 0;
+ orig = bdev_init(c0->lxc_conf, src, NULL, NULL);
+ if (!orig) {
+ ERROR("failed to detect blockdev type for %s", src);
+ return NULL;
+ }
- fstype = buffer;
- fstype += lxc_char_left_gc(fstype, strlen(fstype));
- fstype[lxc_char_right_gc(fstype, strlen(fstype))] = '\0';
+ if (!orig->dest) {
+ int ret;
+ size_t len;
+ struct stat sb;
- DEBUG("trying to mount '%s'->'%s' with fstype '%s'",
- cbarg->rootfs, cbarg->target, fstype);
+ len = strlen(oldpath) + strlen(oldname) + strlen("/rootfs") + 2;
+ orig->dest = malloc(len);
+ if (!orig->dest) {
+ ERROR("out of memory");
+ bdev_put(orig);
+ return NULL;
+ }
+ ret = snprintf(orig->dest, len, "%s/%s/rootfs", oldpath, oldname);
+ if (ret < 0 || (size_t)ret >= len) {
+ ERROR("rootfs path too long");
+ bdev_put(orig);
+ return NULL;
+ }
+ ret = stat(orig->dest, &sb);
+ if (ret < 0 && errno == ENOENT)
+ if (mkdir_p(orig->dest, 0755) < 0)
+ WARN("Error creating '%s', continuing.", orig->dest);
+ }
- if (parse_mntopts(cbarg->options, &mntflags, &mntdata) < 0) {
- free(mntdata);
- return 0;
+ /*
+ * special case for snapshot - if caller requested maybe_snapshot and
+ * keepbdevtype and backing store is directory, then proceed with a copy
+ * clone rather than returning error
+ */
+ if (maybe_snap && keepbdevtype && !bdevtype && !orig->ops->can_snapshot)
+ snap = false;
+
+ /*
+ * If newtype is NULL and snapshot is set, then use overlayfs
+ */
+ if (!bdevtype && !keepbdevtype && snap && strcmp(orig->type , "dir") == 0)
+ bdevtype = "overlayfs";
+
+ if (am_unpriv() && !unpriv_snap_allowed(orig, bdevtype, snap, maybe_snap)) {
+ ERROR("Unsupported snapshot type for unprivileged users");
+ bdev_put(orig);
+ return NULL;
}
- if (mount(cbarg->rootfs, cbarg->target, fstype, mntflags, mntdata)) {
- DEBUG("mount failed with error: %s", strerror(errno));
- free(mntdata);
- return 0;
+ *needs_rdep = 0;
+ if (bdevtype && strcmp(orig->type, "dir") == 0 &&
+ (strcmp(bdevtype, "aufs") == 0 ||
+ strcmp(bdevtype, "overlayfs") == 0)) {
+ *needs_rdep = 1;
+ } else if (snap && strcmp(orig->type, "lvm") == 0 &&
+ !lvm_is_thin_volume(orig->src)) {
+ *needs_rdep = 1;
}
- free(mntdata);
+ new = bdev_get(bdevtype ? bdevtype : orig->type);
+ if (!new) {
+ ERROR("no such block device type: %s", bdevtype ? bdevtype : orig->type);
+ bdev_put(orig);
+ return NULL;
+ }
- INFO("mounted '%s' on '%s', with fstype '%s'",
- cbarg->rootfs, cbarg->target, fstype);
+ if (new->ops->clone_paths(orig, new, oldname, cname, oldpath, lxcpath,
+ snap, newsize, c0->lxc_conf) < 0) {
+ ERROR("failed getting pathnames for cloned storage: %s", src);
+ goto err;
+ }
- return 1;
-}
+ if (am_unpriv() && chown_mapped_root(new->src, c0->lxc_conf) < 0)
+ WARN("Failed to update ownership of %s", new->dest);
-int mount_unknown_fs(const char *rootfs, const char *target,
- const char *options)
-{
- struct cbarg {
- const char *rootfs;
- const char *target;
- const char *options;
- } cbarg = {
- .rootfs = rootfs,
- .target = target,
- .options = options,
- };
+ if (snap)
+ return new;
/*
- * find the filesystem type with brute force:
- * first we check with /etc/filesystems, in case the modules
- * are auto-loaded and fall back to the supported kernel fs
+ * https://github.com/lxc/lxc/issues/131
+ * Use btrfs snapshot feature instead of rsync to restore if both orig and new are btrfs
*/
- char *fsfile[] = {
- "/etc/filesystems",
- "/proc/filesystems",
- };
-
- size_t i;
- for (i = 0; i < sizeof(fsfile)/sizeof(fsfile[0]); i++) {
-
- int ret;
+ if (bdevtype &&
+ strcmp(orig->type, "btrfs") == 0 && strcmp(new->type, "btrfs") == 0 &&
+ btrfs_same_fs(orig->dest, new->dest) == 0) {
+ if (btrfs_destroy(new) < 0) {
+ ERROR("Error destroying %s subvolume", new->dest);
+ goto err;
+ }
+ if (mkdir_p(new->dest, 0755) < 0) {
+ ERROR("Error creating %s directory", new->dest);
+ goto err;
+ }
+ if (btrfs_snapshot(orig->dest, new->dest) < 0) {
+ ERROR("Error restoring %s to %s", orig->dest, new->dest);
+ goto err;
+ }
+ bdev_put(orig);
+ return new;
+ }
- if (access(fsfile[i], F_OK))
- continue;
+ pid = fork();
+ if (pid < 0) {
+ SYSERROR("fork");
+ goto err;
+ }
- ret = lxc_file_for_each_line(fsfile[i], find_fstype_cb, &cbarg);
+ if (pid > 0) {
+ int ret = wait_for_pid(pid);
+ bdev_put(orig);
if (ret < 0) {
- ERROR("failed to parse '%s'", fsfile[i]);
- return -1;
+ bdev_put(new);
+ return NULL;
}
-
- if (ret)
- return 0;
+ return new;
}
- ERROR("failed to determine fs type for '%s'", rootfs);
- return -1;
-}
+ data.orig = orig;
+ data.new = new;
+ if (am_unpriv())
+ ret = userns_exec_1(c0->lxc_conf, rsync_rootfs_wrapper, &data);
+ else
+ ret = rsync_rootfs(&data);
-int do_mkfs(const char *path, const char *fstype)
+ exit(ret == 0 ? 0 : 1);
+
+err:
+ bdev_put(orig);
+ bdev_put(new);
+ return NULL;
+}
+
+/*
+ * bdev_create:
+ * Create a backing store for a container.
+ * If successful, return a struct bdev *, with the bdev mounted and ready
+ * for use. Before completing, the caller will need to call the
+ * umount operation and bdev_put().
+ * @dest: the mountpoint (i.e. /var/lib/lxc/$name/rootfs)
+ * @type: the bdevtype (dir, btrfs, zfs, rbd, etc)
+ * @cname: the container name
+ * @specs: details about the backing store to create, like fstype
+ */
+struct bdev *bdev_create(const char *dest, const char *type, const char *cname,
+ struct bdev_specs *specs)
{
- pid_t pid;
+ struct bdev *bdev;
+ char *best_options[] = {"btrfs", "zfs", "lvm", "dir", "rbd", NULL};
- if ((pid = fork()) < 0) {
- ERROR("error forking");
- return -1;
+ if (!type)
+ return do_bdev_create(dest, "dir", cname, specs);
+
+ if (strcmp(type, "best") == 0) {
+ int i;
+ // try for the best backing store type, according to our
+ // opinionated preferences
+ for (i = 0; best_options[i]; i++) {
+ if ((bdev = do_bdev_create(dest, best_options[i], cname, specs)))
+ return bdev;
+ }
+ return NULL; // 'dir' should never fail, so this shouldn't happen
}
- if (pid > 0)
- return wait_for_pid(pid);
- // If the file is not a block device, we don't want mkfs to ask
- // us about whether to proceed.
- if (null_stdfds() < 0)
- exit(1);
- execlp("mkfs", "mkfs", "-t", fstype, path, NULL);
- exit(1);
+ // -B lvm,dir
+ if (strchr(type, ',') != NULL) {
+ char *dup = alloca(strlen(type) + 1), *saveptr = NULL, *token;
+ strcpy(dup, type);
+ for (token = strtok_r(dup, ",", &saveptr); token;
+ token = strtok_r(NULL, ",", &saveptr)) {
+ if ((bdev = do_bdev_create(dest, token, cname, specs)))
+ return bdev;
+ }
+ }
+
+ return do_bdev_create(dest, type, cname, specs);
}
-static char *linkderef(char *path, char *dest)
+bool bdev_destroy(struct lxc_conf *conf)
{
- struct stat sbuf;
- ssize_t ret;
+ struct bdev *r;
+ bool ret = false;
- ret = stat(path, &sbuf);
- if (ret < 0)
+ r = bdev_init(conf, conf->rootfs.path, conf->rootfs.mount, NULL);
+ if (!r)
+ return ret;
+
+ if (r->ops->destroy(r) == 0)
+ ret = true;
+ bdev_put(r);
+
+ return ret;
+}
+
+int bdev_destroy_wrapper(void *data)
+{
+ struct lxc_conf *conf = data;
+
+ if (setgid(0) < 0) {
+ ERROR("Failed to setgid to 0");
+ return -1;
+ }
+ if (setgroups(0, NULL) < 0)
+ WARN("Failed to clear groups");
+ if (setuid(0) < 0) {
+ ERROR("Failed to setuid to 0");
+ return -1;
+ }
+ if (!bdev_destroy(conf))
+ return -1;
+ else
+ return 0;
+}
+
+struct bdev *bdev_init(struct lxc_conf *conf, const char *src, const char *dst,
+ const char *mntopts)
+{
+ struct bdev *bdev;
+ const struct bdev_type *q;
+
+ if (!src)
+ src = conf->rootfs.path;
+
+ if (!src)
return NULL;
- if (!S_ISLNK(sbuf.st_mode))
- return path;
- ret = readlink(path, dest, MAXPATHLEN);
- if (ret < 0) {
- SYSERROR("error reading link %s", path);
+
+ q = bdev_query(src);
+ if (!q)
return NULL;
- } else if (ret >= MAXPATHLEN) {
- ERROR("link in %s too long", path);
+
+ bdev = malloc(sizeof(struct bdev));
+ if (!bdev)
return NULL;
- }
- dest[ret] = '\0';
- return dest;
+ memset(bdev, 0, sizeof(struct bdev));
+ bdev->ops = q->ops;
+ bdev->type = q->name;
+ if (mntopts)
+ bdev->mntopts = strdup(mntopts);
+ if (src)
+ bdev->src = strdup(src);
+ if (dst)
+ bdev->dest = strdup(dst);
+ if (strcmp(bdev->type, "nbd") == 0)
+ bdev->nbd_idx = conf->nbd_idx;
+
+ return bdev;
+}
+
+bool bdev_is_dir(struct lxc_conf *conf, const char *path)
+{
+ struct bdev *orig = bdev_init(conf, path, NULL, NULL);
+ bool ret = false;
+ if (!orig)
+ return ret;
+ if (strcmp(orig->type, "dir") == 0)
+ ret = true;
+ bdev_put(orig);
+ return ret;
+}
+
+void bdev_put(struct bdev *bdev)
+{
+ free(bdev->mntopts);
+ free(bdev->src);
+ free(bdev->dest);
+ free(bdev);
+}
+
+/*
+ * return block size of dev->src in units of bytes
+ */
+int blk_getsize(struct bdev *bdev, uint64_t *size)
+{
+ int fd, ret;
+ char *path = bdev->src;
+
+ if (strcmp(bdev->type, "loop") == 0)
+ path = bdev->src + 5;
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0)
+ return -1;
+
+ ret = ioctl(fd, BLKGETSIZE64, size); // size of device in bytes
+ close(fd);
+ return ret;
+}
+
+void detach_block_device(struct lxc_conf *conf)
+{
+ if (conf->nbd_idx != -1)
+ detach_nbd_idx(conf->nbd_idx);
}
/*
int status;
close(p[1]);
memset(type, 0, len);
- ret = read(p[0], type, len-1);
+ ret = read(p[0], type, len - 1);
close(p[0]);
if (ret < 0) {
SYSERROR("error reading from pipe");
return -1;
}
wait(&status);
- type[len-1] = '\0';
+ type[len - 1] = '\0';
INFO("detected fstype %s for %s", type, srcdev);
return ret;
}
*sp1 = '\0';
if (strcmp(line, l))
continue;
- sp2 = strchr(sp1+1, ' ');
+ sp2 = strchr(sp1 + 1, ' ');
if (!sp2)
exit(1);
*sp2 = '\0';
- sp3 = strchr(sp2+1, ' ');
+ sp3 = strchr(sp2 + 1, ' ');
if (!sp3)
exit(1);
*sp3 = '\0';
exit(1);
}
-struct bdev_type {
- const char *name;
- const struct bdev_ops *ops;
-};
+int do_mkfs(const char *path, const char *fstype)
+{
+ pid_t pid;
+
+ if ((pid = fork()) < 0) {
+ ERROR("error forking");
+ return -1;
+ }
+ if (pid > 0)
+ return wait_for_pid(pid);
+
+ // If the file is not a block device, we don't want mkfs to ask
+ // us about whether to proceed.
+ if (null_stdfds() < 0)
+ exit(1);
+ execlp("mkfs", "mkfs", "-t", fstype, path, NULL);
+ exit(1);
+}
-// this will return 1 for physical disks, qemu-nbd, loop, etc
-// right now only lvm is a block device
+/*
+ * This will return 1 for physical disks, qemu-nbd, loop, etc right now only lvm
+ * is a block device.
+ */
int is_blktype(struct bdev *b)
{
if (strcmp(b->type, "lvm") == 0)
return 0;
}
-static const struct bdev_type bdevs[] = {
- {.name = "zfs", .ops = &zfs_ops,},
- {.name = "lvm", .ops = &lvm_ops,},
- {.name = "rbd", .ops = &rbd_ops,},
- {.name = "btrfs", .ops = &btrfs_ops,},
- {.name = "dir", .ops = &dir_ops,},
- {.name = "aufs", .ops = &aufs_ops,},
- {.name = "overlayfs", .ops = &ovl_ops,},
- {.name = "loop", .ops = &loop_ops,},
- {.name = "nbd", .ops = &nbd_ops,},
-};
+int mount_unknown_fs(const char *rootfs, const char *target,
+ const char *options)
+{
+ struct cbarg {
+ const char *rootfs;
+ const char *target;
+ const char *options;
+ } cbarg = {
+ .rootfs = rootfs,
+ .target = target,
+ .options = options,
+ };
-static const size_t numbdevs = sizeof(bdevs) / sizeof(struct bdev_type);
+ /*
+ * find the filesystem type with brute force:
+ * first we check with /etc/filesystems, in case the modules
+ * are auto-loaded and fall back to the supported kernel fs
+ */
+ char *fsfile[] = {
+ "/etc/filesystems",
+ "/proc/filesystems",
+ };
-void bdev_put(struct bdev *bdev)
-{
- free(bdev->mntopts);
- free(bdev->src);
- free(bdev->dest);
- free(bdev);
-}
+ size_t i;
+ for (i = 0; i < sizeof(fsfile) / sizeof(fsfile[0]); i++) {
-struct bdev *bdev_get(const char *type)
-{
- int i;
- struct bdev *bdev;
+ int ret;
- for (i=0; i<numbdevs; i++) {
- if (strcmp(bdevs[i].name, type) == 0)
- break;
- }
- if (i == numbdevs)
- return NULL;
+ if (access(fsfile[i], F_OK))
+ continue;
+
+ ret = lxc_file_for_each_line(fsfile[i], find_fstype_cb, &cbarg);
+ if (ret < 0) {
+ ERROR("failed to parse '%s'", fsfile[i]);
+ return -1;
+ }
+
+ if (ret)
+ return 0;
+ }
+
+ ERROR("failed to determine fs type for '%s'", rootfs);
+ return -1;
+}
+
+bool rootfs_is_blockdev(struct lxc_conf *conf)
+{
+ const struct bdev_type *q;
+ struct stat st;
+ int ret;
+
+ if (!conf->rootfs.path || strcmp(conf->rootfs.path, "/") == 0 ||
+ strlen(conf->rootfs.path) == 0)
+ return false;
+
+ ret = stat(conf->rootfs.path, &st);
+ if (ret == 0 && S_ISBLK(st.st_mode))
+ return true;
+ q = bdev_query(conf->rootfs.path);
+ if (!q)
+ return false;
+ if (strcmp(q->name, "lvm") == 0 ||
+ strcmp(q->name, "loop") == 0 ||
+ strcmp(q->name, "nbd") == 0)
+ return true;
+ return false;
+}
+
+static struct bdev *do_bdev_create(const char *dest, const char *type,
+ const char *cname, struct bdev_specs *specs)
+{
+
+ struct bdev *bdev = bdev_get(type);
+ if (!bdev) {
+ return NULL;
+ }
+
+ if (bdev->ops->create(bdev, dest, cname, specs) < 0) {
+ bdev_put(bdev);
+ return NULL;
+ }
+
+ return bdev;
+}
+
+static struct bdev *bdev_get(const char *type)
+{
+ int i;
+ struct bdev *bdev;
+
+ for (i = 0; i < numbdevs; i++) {
+ if (strcmp(bdevs[i].name, type) == 0)
+ break;
+ }
+ if (i == numbdevs)
+ return NULL;
bdev = malloc(sizeof(struct bdev));
if (!bdev)
return NULL;
static const struct bdev_type *bdev_query(const char *src)
{
int i;
- for (i=0; i<numbdevs; i++) {
+ for (i = 0; i < numbdevs; i++) {
int r;
r = bdevs[i].ops->detect(src);
if (r)
return &bdevs[i];
}
-struct bdev *bdev_init(struct lxc_conf *conf, const char *src, const char *dst,
- const char *mntopts)
+/*
+ * These are copied from conf.c. However as conf.c will be moved to using
+ * the callback system, they can be pulled from there eventually, so we
+ * don't need to pollute utils.c with these low level functions
+ */
+static int find_fstype_cb(char* buffer, void *data)
{
- struct bdev *bdev;
- const struct bdev_type *q;
+ struct cbarg {
+ const char *rootfs;
+ const char *target;
+ const char *options;
+ } *cbarg = data;
- if (!src)
- src = conf->rootfs.path;
+ unsigned long mntflags;
+ char *mntdata;
+ char *fstype;
- if (!src)
- return NULL;
+ /* we don't try 'nodev' entries */
+ if (strstr(buffer, "nodev"))
+ return 0;
- q = bdev_query(src);
- if (!q)
- return NULL;
+ fstype = buffer;
+ fstype += lxc_char_left_gc(fstype, strlen(fstype));
+ fstype[lxc_char_right_gc(fstype, strlen(fstype))] = '\0';
- bdev = malloc(sizeof(struct bdev));
- if (!bdev)
- return NULL;
- memset(bdev, 0, sizeof(struct bdev));
- bdev->ops = q->ops;
- bdev->type = q->name;
- if (mntopts)
- bdev->mntopts = strdup(mntopts);
- if (src)
- bdev->src = strdup(src);
- if (dst)
- bdev->dest = strdup(dst);
- if (strcmp(bdev->type, "nbd") == 0)
- bdev->nbd_idx = conf->nbd_idx;
+ DEBUG("trying to mount '%s'->'%s' with fstype '%s'",
+ cbarg->rootfs, cbarg->target, fstype);
- return bdev;
-}
+ if (parse_mntopts(cbarg->options, &mntflags, &mntdata) < 0) {
+ free(mntdata);
+ return 0;
+ }
-bool bdev_is_dir(struct lxc_conf *conf, const char *path)
-{
- struct bdev *orig = bdev_init(conf, path, NULL, NULL);
- bool ret = false;
- if (!orig)
- return ret;
- if (strcmp(orig->type, "dir") == 0)
- ret = true;
- bdev_put(orig);
- return ret;
+ if (mount(cbarg->rootfs, cbarg->target, fstype, mntflags, mntdata)) {
+ DEBUG("mount failed with error: %s", strerror(errno));
+ free(mntdata);
+ return 0;
+ }
+
+ free(mntdata);
+
+ INFO("mounted '%s' on '%s', with fstype '%s'",
+ cbarg->rootfs, cbarg->target, fstype);
+
+ return 1;
}
-bool bdev_can_backup(struct lxc_conf *conf)
+static char *linkderef(char *path, char *dest)
{
- struct bdev *bdev = bdev_init(conf, NULL, NULL, NULL);
- bool ret;
+ struct stat sbuf;
+ ssize_t ret;
- if (!bdev)
- return false;
- ret = bdev->ops->can_backup;
- bdev_put(bdev);
- return ret;
+ ret = stat(path, &sbuf);
+ if (ret < 0)
+ return NULL;
+ if (!S_ISLNK(sbuf.st_mode))
+ return path;
+ ret = readlink(path, dest, MAXPATHLEN);
+ if (ret < 0) {
+ SYSERROR("error reading link %s", path);
+ return NULL;
+ } else if (ret >= MAXPATHLEN) {
+ ERROR("link in %s too long", path);
+ return NULL;
+ }
+ dest[ret] = '\0';
+ return dest;
}
/*
return true;
return false;
}
-
-/*
- * If we're not snaphotting, then bdev_copy becomes a simple case of mount
- * the original, mount the new, and rsync the contents.
- */
-struct bdev *bdev_copy(struct lxc_container *c0, const char *cname,
- const char *lxcpath, const char *bdevtype, int flags,
- const char *bdevdata, uint64_t newsize, int *needs_rdep)
-{
- struct bdev *orig, *new;
- pid_t pid;
- int ret;
- bool snap = flags & LXC_CLONE_SNAPSHOT;
- bool maybe_snap = flags & LXC_CLONE_MAYBE_SNAPSHOT;
- bool keepbdevtype = flags & LXC_CLONE_KEEPBDEVTYPE;
- const char *src = c0->lxc_conf->rootfs.path;
- const char *oldname = c0->name;
- const char *oldpath = c0->config_path;
- struct rsync_data data;
-
- /* if the container name doesn't show up in the rootfs path, then
- * we don't know how to come up with a new name
- */
- if (strstr(src, oldname) == NULL) {
- ERROR("original rootfs path %s doesn't include container name %s",
- src, oldname);
- return NULL;
- }
-
- orig = bdev_init(c0->lxc_conf, src, NULL, NULL);
- if (!orig) {
- ERROR("failed to detect blockdev type for %s", src);
- return NULL;
- }
-
- if (!orig->dest) {
- int ret;
- size_t len;
- struct stat sb;
-
- len = strlen(oldpath) + strlen(oldname) + strlen("/rootfs") + 2;
- orig->dest = malloc(len);
- if (!orig->dest) {
- ERROR("out of memory");
- bdev_put(orig);
- return NULL;
- }
- ret = snprintf(orig->dest, len, "%s/%s/rootfs", oldpath, oldname);
- if (ret < 0 || ret >= len) {
- ERROR("rootfs path too long");
- bdev_put(orig);
- return NULL;
- }
- ret = stat(orig->dest, &sb);
- if (ret < 0 && errno == ENOENT)
- if (mkdir_p(orig->dest, 0755) < 0)
- WARN("Error creating '%s', continuing.", orig->dest);
- }
-
- /*
- * special case for snapshot - if caller requested maybe_snapshot and
- * keepbdevtype and backing store is directory, then proceed with a copy
- * clone rather than returning error
- */
- if (maybe_snap && keepbdevtype && !bdevtype && !orig->ops->can_snapshot)
- snap = false;
-
- /*
- * If newtype is NULL and snapshot is set, then use overlayfs
- */
- if (!bdevtype && !keepbdevtype && snap && strcmp(orig->type , "dir") == 0)
- bdevtype = "overlayfs";
-
- if (am_unpriv() && !unpriv_snap_allowed(orig, bdevtype, snap, maybe_snap)) {
- ERROR("Unsupported snapshot type for unprivileged users");
- bdev_put(orig);
- return NULL;
- }
-
- *needs_rdep = 0;
- if (bdevtype && strcmp(orig->type, "dir") == 0 &&
- (strcmp(bdevtype, "aufs") == 0 ||
- strcmp(bdevtype, "overlayfs") == 0)) {
- *needs_rdep = 1;
- } else if (snap && strcmp(orig->type, "lvm") == 0 &&
- !lvm_is_thin_volume(orig->src)) {
- *needs_rdep = 1;
- }
-
- new = bdev_get(bdevtype ? bdevtype : orig->type);
- if (!new) {
- ERROR("no such block device type: %s", bdevtype ? bdevtype : orig->type);
- bdev_put(orig);
- return NULL;
- }
-
- if (new->ops->clone_paths(orig, new, oldname, cname, oldpath, lxcpath,
- snap, newsize, c0->lxc_conf) < 0) {
- ERROR("failed getting pathnames for cloned storage: %s", src);
- goto err;
- }
-
- if (am_unpriv() && chown_mapped_root(new->src, c0->lxc_conf) < 0)
- WARN("Failed to update ownership of %s", new->dest);
-
- if (snap)
- return new;
-
- /*
- * https://github.com/lxc/lxc/issues/131
- * Use btrfs snapshot feature instead of rsync to restore if both orig and new are btrfs
- */
- if (bdevtype &&
- strcmp(orig->type, "btrfs") == 0 && strcmp(new->type, "btrfs") == 0 &&
- btrfs_same_fs(orig->dest, new->dest) == 0) {
- if (btrfs_destroy(new) < 0) {
- ERROR("Error destroying %s subvolume", new->dest);
- goto err;
- }
- if (mkdir_p(new->dest, 0755) < 0) {
- ERROR("Error creating %s directory", new->dest);
- goto err;
- }
- if (btrfs_snapshot(orig->dest, new->dest) < 0) {
- ERROR("Error restoring %s to %s", orig->dest, new->dest);
- goto err;
- }
- bdev_put(orig);
- return new;
- }
-
- pid = fork();
- if (pid < 0) {
- SYSERROR("fork");
- goto err;
- }
-
- if (pid > 0) {
- int ret = wait_for_pid(pid);
- bdev_put(orig);
- if (ret < 0) {
- bdev_put(new);
- return NULL;
- }
- return new;
- }
-
- data.orig = orig;
- data.new = new;
- if (am_unpriv())
- ret = userns_exec_1(c0->lxc_conf, rsync_rootfs_wrapper, &data);
- else
- ret = rsync_rootfs(&data);
-
- exit(ret == 0 ? 0 : 1);
-
-err:
- bdev_put(orig);
- bdev_put(new);
- return NULL;
-}
-
-static struct bdev *do_bdev_create(const char *dest, const char *type,
- const char *cname, struct bdev_specs *specs)
-{
-
- struct bdev *bdev = bdev_get(type);
- if (!bdev) {
- return NULL;
- }
-
- if (bdev->ops->create(bdev, dest, cname, specs) < 0) {
- bdev_put(bdev);
- return NULL;
- }
-
- return bdev;
-}
-
-/*
- * bdev_create:
- * Create a backing store for a container.
- * If successful, return a struct bdev *, with the bdev mounted and ready
- * for use. Before completing, the caller will need to call the
- * umount operation and bdev_put().
- * @dest: the mountpoint (i.e. /var/lib/lxc/$name/rootfs)
- * @type: the bdevtype (dir, btrfs, zfs, rbd, etc)
- * @cname: the container name
- * @specs: details about the backing store to create, like fstype
- */
-struct bdev *bdev_create(const char *dest, const char *type, const char *cname,
- struct bdev_specs *specs)
-{
- struct bdev *bdev;
- char *best_options[] = {"btrfs", "zfs", "lvm", "dir", "rbd", NULL};
-
- if (!type)
- return do_bdev_create(dest, "dir", cname, specs);
-
- if (strcmp(type, "best") == 0) {
- int i;
- // try for the best backing store type, according to our
- // opinionated preferences
- for (i=0; best_options[i]; i++) {
- if ((bdev = do_bdev_create(dest, best_options[i], cname, specs)))
- return bdev;
- }
- return NULL; // 'dir' should never fail, so this shouldn't happen
- }
-
- // -B lvm,dir
- if (strchr(type, ',') != NULL) {
- char *dup = alloca(strlen(type)+1), *saveptr = NULL, *token;
- strcpy(dup, type);
- for (token = strtok_r(dup, ",", &saveptr); token;
- token = strtok_r(NULL, ",", &saveptr)) {
- if ((bdev = do_bdev_create(dest, token, cname, specs)))
- return bdev;
- }
- }
-
- return do_bdev_create(dest, type, cname, specs);
-}
-
-bool rootfs_is_blockdev(struct lxc_conf *conf)
-{
- const struct bdev_type *q;
- struct stat st;
- int ret;
-
- if (!conf->rootfs.path || strcmp(conf->rootfs.path, "/") == 0 ||
- strlen(conf->rootfs.path) == 0)
- return false;
-
- ret = stat(conf->rootfs.path, &st);
- if (ret == 0 && S_ISBLK(st.st_mode))
- return true;
- q = bdev_query(conf->rootfs.path);
- if (!q)
- return false;
- if (strcmp(q->name, "lvm") == 0 ||
- strcmp(q->name, "loop") == 0 ||
- strcmp(q->name, "nbd") == 0)
- return true;
- return false;
-}
-
-bool bdev_destroy(struct lxc_conf *conf)
-{
- struct bdev *r;
- bool ret = false;
-
- r = bdev_init(conf, conf->rootfs.path, conf->rootfs.mount, NULL);
- if (!r)
- return ret;
-
- if (r->ops->destroy(r) == 0)
- ret = true;
- bdev_put(r);
-
- return ret;
-}
-
-int bdev_destroy_wrapper(void *data)
-{
- struct lxc_conf *conf = data;
-
- if (setgid(0) < 0) {
- ERROR("Failed to setgid to 0");
- return -1;
- }
- if (setgroups(0, NULL) < 0)
- WARN("Failed to clear groups");
- if (setuid(0) < 0) {
- ERROR("Failed to setuid to 0");
- return -1;
- }
- if (!bdev_destroy(conf))
- return -1;
- else
- return 0;
-}
-
-/*
- * attach_block_device returns true if all went well,
- * meaning either a block device was attached or was not
- * needed. It returns false if something went wrong and
- * container startup should be stopped.
- */
-bool attach_block_device(struct lxc_conf *conf)
-{
- char *path;
-
- if (!conf->rootfs.path)
- return true;
- path = conf->rootfs.path;
- if (!requires_nbd(path))
- return true;
- path = strchr(path, ':');
- if (!path)
- return false;
- path++;
- if (!attach_nbd(path, conf))
- return false;
- return true;
-}
-
-void detach_block_device(struct lxc_conf *conf)
-{
- if (conf->nbd_idx != -1)
- detach_nbd_idx(conf->nbd_idx);
-}