From: Christian Brauner Date: Wed, 30 Dec 2015 16:34:08 +0000 (+0100) Subject: Cleanup bdev.c after splitting into modules X-Git-Tag: lxc-2.0.0.beta2~65^2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=cdb4e53a7df61f5f3c4a4b03ae5990cb4c86263e;p=thirdparty%2Flxc.git Cleanup bdev.c after splitting into modules The function - bdev_get(); becomes static. It is called from nowhere else so far and never appeared in any header. Minor changes - Avoid comparisons between int and size_t types. Use size_t where possible else cast to size_t when it makes sense. - insert missing spaces between operators - put declarations for all static functions at the top Signed-off-by: Christian Brauner --- diff --git a/src/lxc/bdev/bdev.c b/src/lxc/bdev/bdev.c index 96b0a5967..14f4a9e76 100644 --- a/src/lxc/bdev/bdev.c +++ b/src/lxc/bdev/bdev.c @@ -189,12 +189,30 @@ static const struct bdev_ops zfs_ops = { .can_backup = true, }; +struct bdev_type { + const char *name; + const struct bdev_ops *ops; +}; + +static const struct bdev_type bdevs[] = { + {.name = "zfs", .ops = &zfs_ops,}, + {.name = "lvm", .ops = &lvm_ops,}, + {.name = "rbd", .ops = &rbd_ops,}, + {.name = "btrfs", .ops = &btrfs_ops,}, + {.name = "dir", .ops = &dir_ops,}, + {.name = "aufs", .ops = &aufs_ops,}, + {.name = "overlayfs", .ops = &ovl_ops,}, + {.name = "loop", .ops = &loop_ops,}, + {.name = "nbd", .ops = &nbd_ops,}, +}; + +static const size_t numbdevs = sizeof(bdevs) / sizeof(struct bdev_type); + /* helpers */ -/* - * These are copied from conf.c. However as conf.c will be moved to using - * the callback system, they can be pulled from there eventually, so we - * don't need to pollute utils.c with these low level functions - */ +static const struct bdev_type *bdev_query(const char *src); +static struct bdev *bdev_get(const char *type); +static struct bdev *do_bdev_create(const char *dest, const char *type, + const char *cname, struct bdev_specs *specs); static int find_fstype_cb(char *buffer, void *data); static char *linkderef(char *path, char *dest); static bool unpriv_snap_allowed(struct bdev *b, const char *t, bool snap, @@ -233,8 +251,8 @@ char *dir_new_path(char *src, const char *oldname, const char *name, } while ((p2 = strstr(src, oldname)) != NULL) { - strncpy(p, src, p2-src); // copy text up to oldname - p += p2-src; // move target pointer (p) + strncpy(p, src, p2 - src); // copy text up to oldname + p += p2 - src; // move target pointer (p) p += sprintf(p, "%s", name); // print new name in place of oldname src = p2 + l2; // move src to end of oldname } @@ -243,156 +261,361 @@ char *dir_new_path(char *src, const char *oldname, const char *name, } /* - * return block size of dev->src in units of bytes + * attach_block_device returns true if all went well, + * meaning either a block device was attached or was not + * needed. It returns false if something went wrong and + * container startup should be stopped. */ -int blk_getsize(struct bdev *bdev, uint64_t *size) +bool attach_block_device(struct lxc_conf *conf) { - int fd, ret; - char *path = bdev->src; + char *path; - if (strcmp(bdev->type, "loop") == 0) - path = bdev->src + 5; + if (!conf->rootfs.path) + return true; + path = conf->rootfs.path; + if (!requires_nbd(path)) + return true; + path = strchr(path, ':'); + if (!path) + return false; + path++; + if (!attach_nbd(path, conf)) + return false; + return true; +} - fd = open(path, O_RDONLY); - if (fd < 0) - return -1; +bool bdev_can_backup(struct lxc_conf *conf) +{ + struct bdev *bdev = bdev_init(conf, NULL, NULL, NULL); + bool ret; - ret = ioctl(fd, BLKGETSIZE64, size); // size of device in bytes - close(fd); + if (!bdev) + return false; + ret = bdev->ops->can_backup; + bdev_put(bdev); return ret; } /* - * These are copied from conf.c. However as conf.c will be moved to using - * the callback system, they can be pulled from there eventually, so we - * don't need to pollute utils.c with these low level functions + * If we're not snaphotting, then bdev_copy becomes a simple case of mount + * the original, mount the new, and rsync the contents. */ -static int find_fstype_cb(char* buffer, void *data) +struct bdev *bdev_copy(struct lxc_container *c0, const char *cname, + const char *lxcpath, const char *bdevtype, int flags, + const char *bdevdata, uint64_t newsize, int *needs_rdep) { - struct cbarg { - const char *rootfs; - const char *target; - const char *options; - } *cbarg = data; + struct bdev *orig, *new; + pid_t pid; + int ret; + bool snap = flags & LXC_CLONE_SNAPSHOT; + bool maybe_snap = flags & LXC_CLONE_MAYBE_SNAPSHOT; + bool keepbdevtype = flags & LXC_CLONE_KEEPBDEVTYPE; + const char *src = c0->lxc_conf->rootfs.path; + const char *oldname = c0->name; + const char *oldpath = c0->config_path; + struct rsync_data data; - unsigned long mntflags; - char *mntdata; - char *fstype; + /* if the container name doesn't show up in the rootfs path, then + * we don't know how to come up with a new name + */ + if (strstr(src, oldname) == NULL) { + ERROR("original rootfs path %s doesn't include container name %s", + src, oldname); + return NULL; + } - /* we don't try 'nodev' entries */ - if (strstr(buffer, "nodev")) - return 0; + orig = bdev_init(c0->lxc_conf, src, NULL, NULL); + if (!orig) { + ERROR("failed to detect blockdev type for %s", src); + return NULL; + } - fstype = buffer; - fstype += lxc_char_left_gc(fstype, strlen(fstype)); - fstype[lxc_char_right_gc(fstype, strlen(fstype))] = '\0'; + if (!orig->dest) { + int ret; + size_t len; + struct stat sb; - DEBUG("trying to mount '%s'->'%s' with fstype '%s'", - cbarg->rootfs, cbarg->target, fstype); + len = strlen(oldpath) + strlen(oldname) + strlen("/rootfs") + 2; + orig->dest = malloc(len); + if (!orig->dest) { + ERROR("out of memory"); + bdev_put(orig); + return NULL; + } + ret = snprintf(orig->dest, len, "%s/%s/rootfs", oldpath, oldname); + if (ret < 0 || (size_t)ret >= len) { + ERROR("rootfs path too long"); + bdev_put(orig); + return NULL; + } + ret = stat(orig->dest, &sb); + if (ret < 0 && errno == ENOENT) + if (mkdir_p(orig->dest, 0755) < 0) + WARN("Error creating '%s', continuing.", orig->dest); + } - if (parse_mntopts(cbarg->options, &mntflags, &mntdata) < 0) { - free(mntdata); - return 0; + /* + * special case for snapshot - if caller requested maybe_snapshot and + * keepbdevtype and backing store is directory, then proceed with a copy + * clone rather than returning error + */ + if (maybe_snap && keepbdevtype && !bdevtype && !orig->ops->can_snapshot) + snap = false; + + /* + * If newtype is NULL and snapshot is set, then use overlayfs + */ + if (!bdevtype && !keepbdevtype && snap && strcmp(orig->type , "dir") == 0) + bdevtype = "overlayfs"; + + if (am_unpriv() && !unpriv_snap_allowed(orig, bdevtype, snap, maybe_snap)) { + ERROR("Unsupported snapshot type for unprivileged users"); + bdev_put(orig); + return NULL; } - if (mount(cbarg->rootfs, cbarg->target, fstype, mntflags, mntdata)) { - DEBUG("mount failed with error: %s", strerror(errno)); - free(mntdata); - return 0; + *needs_rdep = 0; + if (bdevtype && strcmp(orig->type, "dir") == 0 && + (strcmp(bdevtype, "aufs") == 0 || + strcmp(bdevtype, "overlayfs") == 0)) { + *needs_rdep = 1; + } else if (snap && strcmp(orig->type, "lvm") == 0 && + !lvm_is_thin_volume(orig->src)) { + *needs_rdep = 1; } - free(mntdata); + new = bdev_get(bdevtype ? bdevtype : orig->type); + if (!new) { + ERROR("no such block device type: %s", bdevtype ? bdevtype : orig->type); + bdev_put(orig); + return NULL; + } - INFO("mounted '%s' on '%s', with fstype '%s'", - cbarg->rootfs, cbarg->target, fstype); + if (new->ops->clone_paths(orig, new, oldname, cname, oldpath, lxcpath, + snap, newsize, c0->lxc_conf) < 0) { + ERROR("failed getting pathnames for cloned storage: %s", src); + goto err; + } - return 1; -} + if (am_unpriv() && chown_mapped_root(new->src, c0->lxc_conf) < 0) + WARN("Failed to update ownership of %s", new->dest); -int mount_unknown_fs(const char *rootfs, const char *target, - const char *options) -{ - struct cbarg { - const char *rootfs; - const char *target; - const char *options; - } cbarg = { - .rootfs = rootfs, - .target = target, - .options = options, - }; + if (snap) + return new; /* - * find the filesystem type with brute force: - * first we check with /etc/filesystems, in case the modules - * are auto-loaded and fall back to the supported kernel fs + * https://github.com/lxc/lxc/issues/131 + * Use btrfs snapshot feature instead of rsync to restore if both orig and new are btrfs */ - char *fsfile[] = { - "/etc/filesystems", - "/proc/filesystems", - }; - - size_t i; - for (i = 0; i < sizeof(fsfile)/sizeof(fsfile[0]); i++) { - - int ret; + if (bdevtype && + strcmp(orig->type, "btrfs") == 0 && strcmp(new->type, "btrfs") == 0 && + btrfs_same_fs(orig->dest, new->dest) == 0) { + if (btrfs_destroy(new) < 0) { + ERROR("Error destroying %s subvolume", new->dest); + goto err; + } + if (mkdir_p(new->dest, 0755) < 0) { + ERROR("Error creating %s directory", new->dest); + goto err; + } + if (btrfs_snapshot(orig->dest, new->dest) < 0) { + ERROR("Error restoring %s to %s", orig->dest, new->dest); + goto err; + } + bdev_put(orig); + return new; + } - if (access(fsfile[i], F_OK)) - continue; + pid = fork(); + if (pid < 0) { + SYSERROR("fork"); + goto err; + } - ret = lxc_file_for_each_line(fsfile[i], find_fstype_cb, &cbarg); + if (pid > 0) { + int ret = wait_for_pid(pid); + bdev_put(orig); if (ret < 0) { - ERROR("failed to parse '%s'", fsfile[i]); - return -1; + bdev_put(new); + return NULL; } - - if (ret) - return 0; + return new; } - ERROR("failed to determine fs type for '%s'", rootfs); - return -1; -} + data.orig = orig; + data.new = new; + if (am_unpriv()) + ret = userns_exec_1(c0->lxc_conf, rsync_rootfs_wrapper, &data); + else + ret = rsync_rootfs(&data); -int do_mkfs(const char *path, const char *fstype) + exit(ret == 0 ? 0 : 1); + +err: + bdev_put(orig); + bdev_put(new); + return NULL; +} + +/* + * bdev_create: + * Create a backing store for a container. + * If successful, return a struct bdev *, with the bdev mounted and ready + * for use. Before completing, the caller will need to call the + * umount operation and bdev_put(). + * @dest: the mountpoint (i.e. /var/lib/lxc/$name/rootfs) + * @type: the bdevtype (dir, btrfs, zfs, rbd, etc) + * @cname: the container name + * @specs: details about the backing store to create, like fstype + */ +struct bdev *bdev_create(const char *dest, const char *type, const char *cname, + struct bdev_specs *specs) { - pid_t pid; + struct bdev *bdev; + char *best_options[] = {"btrfs", "zfs", "lvm", "dir", "rbd", NULL}; - if ((pid = fork()) < 0) { - ERROR("error forking"); - return -1; + if (!type) + return do_bdev_create(dest, "dir", cname, specs); + + if (strcmp(type, "best") == 0) { + int i; + // try for the best backing store type, according to our + // opinionated preferences + for (i = 0; best_options[i]; i++) { + if ((bdev = do_bdev_create(dest, best_options[i], cname, specs))) + return bdev; + } + return NULL; // 'dir' should never fail, so this shouldn't happen } - if (pid > 0) - return wait_for_pid(pid); - // If the file is not a block device, we don't want mkfs to ask - // us about whether to proceed. - if (null_stdfds() < 0) - exit(1); - execlp("mkfs", "mkfs", "-t", fstype, path, NULL); - exit(1); + // -B lvm,dir + if (strchr(type, ',') != NULL) { + char *dup = alloca(strlen(type) + 1), *saveptr = NULL, *token; + strcpy(dup, type); + for (token = strtok_r(dup, ",", &saveptr); token; + token = strtok_r(NULL, ",", &saveptr)) { + if ((bdev = do_bdev_create(dest, token, cname, specs))) + return bdev; + } + } + + return do_bdev_create(dest, type, cname, specs); } -static char *linkderef(char *path, char *dest) +bool bdev_destroy(struct lxc_conf *conf) { - struct stat sbuf; - ssize_t ret; + struct bdev *r; + bool ret = false; - ret = stat(path, &sbuf); - if (ret < 0) + r = bdev_init(conf, conf->rootfs.path, conf->rootfs.mount, NULL); + if (!r) + return ret; + + if (r->ops->destroy(r) == 0) + ret = true; + bdev_put(r); + + return ret; +} + +int bdev_destroy_wrapper(void *data) +{ + struct lxc_conf *conf = data; + + if (setgid(0) < 0) { + ERROR("Failed to setgid to 0"); + return -1; + } + if (setgroups(0, NULL) < 0) + WARN("Failed to clear groups"); + if (setuid(0) < 0) { + ERROR("Failed to setuid to 0"); + return -1; + } + if (!bdev_destroy(conf)) + return -1; + else + return 0; +} + +struct bdev *bdev_init(struct lxc_conf *conf, const char *src, const char *dst, + const char *mntopts) +{ + struct bdev *bdev; + const struct bdev_type *q; + + if (!src) + src = conf->rootfs.path; + + if (!src) return NULL; - if (!S_ISLNK(sbuf.st_mode)) - return path; - ret = readlink(path, dest, MAXPATHLEN); - if (ret < 0) { - SYSERROR("error reading link %s", path); + + q = bdev_query(src); + if (!q) return NULL; - } else if (ret >= MAXPATHLEN) { - ERROR("link in %s too long", path); + + bdev = malloc(sizeof(struct bdev)); + if (!bdev) return NULL; - } - dest[ret] = '\0'; - return dest; + memset(bdev, 0, sizeof(struct bdev)); + bdev->ops = q->ops; + bdev->type = q->name; + if (mntopts) + bdev->mntopts = strdup(mntopts); + if (src) + bdev->src = strdup(src); + if (dst) + bdev->dest = strdup(dst); + if (strcmp(bdev->type, "nbd") == 0) + bdev->nbd_idx = conf->nbd_idx; + + return bdev; +} + +bool bdev_is_dir(struct lxc_conf *conf, const char *path) +{ + struct bdev *orig = bdev_init(conf, path, NULL, NULL); + bool ret = false; + if (!orig) + return ret; + if (strcmp(orig->type, "dir") == 0) + ret = true; + bdev_put(orig); + return ret; +} + +void bdev_put(struct bdev *bdev) +{ + free(bdev->mntopts); + free(bdev->src); + free(bdev->dest); + free(bdev); +} + +/* + * return block size of dev->src in units of bytes + */ +int blk_getsize(struct bdev *bdev, uint64_t *size) +{ + int fd, ret; + char *path = bdev->src; + + if (strcmp(bdev->type, "loop") == 0) + path = bdev->src + 5; + + fd = open(path, O_RDONLY); + if (fd < 0) + return -1; + + ret = ioctl(fd, BLKGETSIZE64, size); // size of device in bytes + close(fd); + return ret; +} + +void detach_block_device(struct lxc_conf *conf) +{ + if (conf->nbd_idx != -1) + detach_nbd_idx(conf->nbd_idx); } /* @@ -428,7 +651,7 @@ int detect_fs(struct bdev *bdev, char *type, int len) int status; close(p[1]); memset(type, 0, len); - ret = read(p[0], type, len-1); + ret = read(p[0], type, len - 1); close(p[0]); if (ret < 0) { SYSERROR("error reading from pipe"); @@ -440,7 +663,7 @@ int detect_fs(struct bdev *bdev, char *type, int len) return -1; } wait(&status); - type[len-1] = '\0'; + type[len - 1] = '\0'; INFO("detected fstype %s for %s", type, srcdev); return ret; } @@ -475,11 +698,11 @@ int detect_fs(struct bdev *bdev, char *type, int len) *sp1 = '\0'; if (strcmp(line, l)) continue; - sp2 = strchr(sp1+1, ' '); + sp2 = strchr(sp1 + 1, ' '); if (!sp2) exit(1); *sp2 = '\0'; - sp3 = strchr(sp2+1, ' '); + sp3 = strchr(sp2 + 1, ' '); if (!sp3) exit(1); *sp3 = '\0'; @@ -491,13 +714,29 @@ int detect_fs(struct bdev *bdev, char *type, int len) exit(1); } -struct bdev_type { - const char *name; - const struct bdev_ops *ops; -}; +int do_mkfs(const char *path, const char *fstype) +{ + pid_t pid; + + if ((pid = fork()) < 0) { + ERROR("error forking"); + return -1; + } + if (pid > 0) + return wait_for_pid(pid); + + // If the file is not a block device, we don't want mkfs to ask + // us about whether to proceed. + if (null_stdfds() < 0) + exit(1); + execlp("mkfs", "mkfs", "-t", fstype, path, NULL); + exit(1); +} -// this will return 1 for physical disks, qemu-nbd, loop, etc -// right now only lvm is a block device +/* + * This will return 1 for physical disks, qemu-nbd, loop, etc right now only lvm + * is a block device. + */ int is_blktype(struct bdev *b) { if (strcmp(b->type, "lvm") == 0) @@ -505,39 +744,102 @@ int is_blktype(struct bdev *b) return 0; } -static const struct bdev_type bdevs[] = { - {.name = "zfs", .ops = &zfs_ops,}, - {.name = "lvm", .ops = &lvm_ops,}, - {.name = "rbd", .ops = &rbd_ops,}, - {.name = "btrfs", .ops = &btrfs_ops,}, - {.name = "dir", .ops = &dir_ops,}, - {.name = "aufs", .ops = &aufs_ops,}, - {.name = "overlayfs", .ops = &ovl_ops,}, - {.name = "loop", .ops = &loop_ops,}, - {.name = "nbd", .ops = &nbd_ops,}, -}; +int mount_unknown_fs(const char *rootfs, const char *target, + const char *options) +{ + struct cbarg { + const char *rootfs; + const char *target; + const char *options; + } cbarg = { + .rootfs = rootfs, + .target = target, + .options = options, + }; -static const size_t numbdevs = sizeof(bdevs) / sizeof(struct bdev_type); + /* + * find the filesystem type with brute force: + * first we check with /etc/filesystems, in case the modules + * are auto-loaded and fall back to the supported kernel fs + */ + char *fsfile[] = { + "/etc/filesystems", + "/proc/filesystems", + }; -void bdev_put(struct bdev *bdev) -{ - free(bdev->mntopts); - free(bdev->src); - free(bdev->dest); - free(bdev); -} + size_t i; + for (i = 0; i < sizeof(fsfile) / sizeof(fsfile[0]); i++) { -struct bdev *bdev_get(const char *type) -{ - int i; - struct bdev *bdev; + int ret; - for (i=0; irootfs.path || strcmp(conf->rootfs.path, "/") == 0 || + strlen(conf->rootfs.path) == 0) + return false; + + ret = stat(conf->rootfs.path, &st); + if (ret == 0 && S_ISBLK(st.st_mode)) + return true; + q = bdev_query(conf->rootfs.path); + if (!q) + return false; + if (strcmp(q->name, "lvm") == 0 || + strcmp(q->name, "loop") == 0 || + strcmp(q->name, "nbd") == 0) + return true; + return false; +} + +static struct bdev *do_bdev_create(const char *dest, const char *type, + const char *cname, struct bdev_specs *specs) +{ + + struct bdev *bdev = bdev_get(type); + if (!bdev) { + return NULL; + } + + if (bdev->ops->create(bdev, dest, cname, specs) < 0) { + bdev_put(bdev); + return NULL; + } + + return bdev; +} + +static struct bdev *bdev_get(const char *type) +{ + int i; + struct bdev *bdev; + + for (i = 0; i < numbdevs; i++) { + if (strcmp(bdevs[i].name, type) == 0) + break; + } + if (i == numbdevs) + return NULL; bdev = malloc(sizeof(struct bdev)); if (!bdev) return NULL; @@ -550,7 +852,7 @@ struct bdev *bdev_get(const char *type) static const struct bdev_type *bdev_query(const char *src) { int i; - for (i=0; idetect(src); if (r) @@ -562,62 +864,73 @@ static const struct bdev_type *bdev_query(const char *src) return &bdevs[i]; } -struct bdev *bdev_init(struct lxc_conf *conf, const char *src, const char *dst, - const char *mntopts) +/* + * These are copied from conf.c. However as conf.c will be moved to using + * the callback system, they can be pulled from there eventually, so we + * don't need to pollute utils.c with these low level functions + */ +static int find_fstype_cb(char* buffer, void *data) { - struct bdev *bdev; - const struct bdev_type *q; + struct cbarg { + const char *rootfs; + const char *target; + const char *options; + } *cbarg = data; - if (!src) - src = conf->rootfs.path; + unsigned long mntflags; + char *mntdata; + char *fstype; - if (!src) - return NULL; + /* we don't try 'nodev' entries */ + if (strstr(buffer, "nodev")) + return 0; - q = bdev_query(src); - if (!q) - return NULL; + fstype = buffer; + fstype += lxc_char_left_gc(fstype, strlen(fstype)); + fstype[lxc_char_right_gc(fstype, strlen(fstype))] = '\0'; - bdev = malloc(sizeof(struct bdev)); - if (!bdev) - return NULL; - memset(bdev, 0, sizeof(struct bdev)); - bdev->ops = q->ops; - bdev->type = q->name; - if (mntopts) - bdev->mntopts = strdup(mntopts); - if (src) - bdev->src = strdup(src); - if (dst) - bdev->dest = strdup(dst); - if (strcmp(bdev->type, "nbd") == 0) - bdev->nbd_idx = conf->nbd_idx; + DEBUG("trying to mount '%s'->'%s' with fstype '%s'", + cbarg->rootfs, cbarg->target, fstype); - return bdev; -} + if (parse_mntopts(cbarg->options, &mntflags, &mntdata) < 0) { + free(mntdata); + return 0; + } -bool bdev_is_dir(struct lxc_conf *conf, const char *path) -{ - struct bdev *orig = bdev_init(conf, path, NULL, NULL); - bool ret = false; - if (!orig) - return ret; - if (strcmp(orig->type, "dir") == 0) - ret = true; - bdev_put(orig); - return ret; + if (mount(cbarg->rootfs, cbarg->target, fstype, mntflags, mntdata)) { + DEBUG("mount failed with error: %s", strerror(errno)); + free(mntdata); + return 0; + } + + free(mntdata); + + INFO("mounted '%s' on '%s', with fstype '%s'", + cbarg->rootfs, cbarg->target, fstype); + + return 1; } -bool bdev_can_backup(struct lxc_conf *conf) +static char *linkderef(char *path, char *dest) { - struct bdev *bdev = bdev_init(conf, NULL, NULL, NULL); - bool ret; + struct stat sbuf; + ssize_t ret; - if (!bdev) - return false; - ret = bdev->ops->can_backup; - bdev_put(bdev); - return ret; + ret = stat(path, &sbuf); + if (ret < 0) + return NULL; + if (!S_ISLNK(sbuf.st_mode)) + return path; + ret = readlink(path, dest, MAXPATHLEN); + if (ret < 0) { + SYSERROR("error reading link %s", path); + return NULL; + } else if (ret >= MAXPATHLEN) { + ERROR("link in %s too long", path); + return NULL; + } + dest[ret] = '\0'; + return dest; } /* @@ -649,315 +962,3 @@ static bool unpriv_snap_allowed(struct bdev *b, const char *t, bool snap, return true; return false; } - -/* - * If we're not snaphotting, then bdev_copy becomes a simple case of mount - * the original, mount the new, and rsync the contents. - */ -struct bdev *bdev_copy(struct lxc_container *c0, const char *cname, - const char *lxcpath, const char *bdevtype, int flags, - const char *bdevdata, uint64_t newsize, int *needs_rdep) -{ - struct bdev *orig, *new; - pid_t pid; - int ret; - bool snap = flags & LXC_CLONE_SNAPSHOT; - bool maybe_snap = flags & LXC_CLONE_MAYBE_SNAPSHOT; - bool keepbdevtype = flags & LXC_CLONE_KEEPBDEVTYPE; - const char *src = c0->lxc_conf->rootfs.path; - const char *oldname = c0->name; - const char *oldpath = c0->config_path; - struct rsync_data data; - - /* if the container name doesn't show up in the rootfs path, then - * we don't know how to come up with a new name - */ - if (strstr(src, oldname) == NULL) { - ERROR("original rootfs path %s doesn't include container name %s", - src, oldname); - return NULL; - } - - orig = bdev_init(c0->lxc_conf, src, NULL, NULL); - if (!orig) { - ERROR("failed to detect blockdev type for %s", src); - return NULL; - } - - if (!orig->dest) { - int ret; - size_t len; - struct stat sb; - - len = strlen(oldpath) + strlen(oldname) + strlen("/rootfs") + 2; - orig->dest = malloc(len); - if (!orig->dest) { - ERROR("out of memory"); - bdev_put(orig); - return NULL; - } - ret = snprintf(orig->dest, len, "%s/%s/rootfs", oldpath, oldname); - if (ret < 0 || ret >= len) { - ERROR("rootfs path too long"); - bdev_put(orig); - return NULL; - } - ret = stat(orig->dest, &sb); - if (ret < 0 && errno == ENOENT) - if (mkdir_p(orig->dest, 0755) < 0) - WARN("Error creating '%s', continuing.", orig->dest); - } - - /* - * special case for snapshot - if caller requested maybe_snapshot and - * keepbdevtype and backing store is directory, then proceed with a copy - * clone rather than returning error - */ - if (maybe_snap && keepbdevtype && !bdevtype && !orig->ops->can_snapshot) - snap = false; - - /* - * If newtype is NULL and snapshot is set, then use overlayfs - */ - if (!bdevtype && !keepbdevtype && snap && strcmp(orig->type , "dir") == 0) - bdevtype = "overlayfs"; - - if (am_unpriv() && !unpriv_snap_allowed(orig, bdevtype, snap, maybe_snap)) { - ERROR("Unsupported snapshot type for unprivileged users"); - bdev_put(orig); - return NULL; - } - - *needs_rdep = 0; - if (bdevtype && strcmp(orig->type, "dir") == 0 && - (strcmp(bdevtype, "aufs") == 0 || - strcmp(bdevtype, "overlayfs") == 0)) { - *needs_rdep = 1; - } else if (snap && strcmp(orig->type, "lvm") == 0 && - !lvm_is_thin_volume(orig->src)) { - *needs_rdep = 1; - } - - new = bdev_get(bdevtype ? bdevtype : orig->type); - if (!new) { - ERROR("no such block device type: %s", bdevtype ? bdevtype : orig->type); - bdev_put(orig); - return NULL; - } - - if (new->ops->clone_paths(orig, new, oldname, cname, oldpath, lxcpath, - snap, newsize, c0->lxc_conf) < 0) { - ERROR("failed getting pathnames for cloned storage: %s", src); - goto err; - } - - if (am_unpriv() && chown_mapped_root(new->src, c0->lxc_conf) < 0) - WARN("Failed to update ownership of %s", new->dest); - - if (snap) - return new; - - /* - * https://github.com/lxc/lxc/issues/131 - * Use btrfs snapshot feature instead of rsync to restore if both orig and new are btrfs - */ - if (bdevtype && - strcmp(orig->type, "btrfs") == 0 && strcmp(new->type, "btrfs") == 0 && - btrfs_same_fs(orig->dest, new->dest) == 0) { - if (btrfs_destroy(new) < 0) { - ERROR("Error destroying %s subvolume", new->dest); - goto err; - } - if (mkdir_p(new->dest, 0755) < 0) { - ERROR("Error creating %s directory", new->dest); - goto err; - } - if (btrfs_snapshot(orig->dest, new->dest) < 0) { - ERROR("Error restoring %s to %s", orig->dest, new->dest); - goto err; - } - bdev_put(orig); - return new; - } - - pid = fork(); - if (pid < 0) { - SYSERROR("fork"); - goto err; - } - - if (pid > 0) { - int ret = wait_for_pid(pid); - bdev_put(orig); - if (ret < 0) { - bdev_put(new); - return NULL; - } - return new; - } - - data.orig = orig; - data.new = new; - if (am_unpriv()) - ret = userns_exec_1(c0->lxc_conf, rsync_rootfs_wrapper, &data); - else - ret = rsync_rootfs(&data); - - exit(ret == 0 ? 0 : 1); - -err: - bdev_put(orig); - bdev_put(new); - return NULL; -} - -static struct bdev *do_bdev_create(const char *dest, const char *type, - const char *cname, struct bdev_specs *specs) -{ - - struct bdev *bdev = bdev_get(type); - if (!bdev) { - return NULL; - } - - if (bdev->ops->create(bdev, dest, cname, specs) < 0) { - bdev_put(bdev); - return NULL; - } - - return bdev; -} - -/* - * bdev_create: - * Create a backing store for a container. - * If successful, return a struct bdev *, with the bdev mounted and ready - * for use. Before completing, the caller will need to call the - * umount operation and bdev_put(). - * @dest: the mountpoint (i.e. /var/lib/lxc/$name/rootfs) - * @type: the bdevtype (dir, btrfs, zfs, rbd, etc) - * @cname: the container name - * @specs: details about the backing store to create, like fstype - */ -struct bdev *bdev_create(const char *dest, const char *type, const char *cname, - struct bdev_specs *specs) -{ - struct bdev *bdev; - char *best_options[] = {"btrfs", "zfs", "lvm", "dir", "rbd", NULL}; - - if (!type) - return do_bdev_create(dest, "dir", cname, specs); - - if (strcmp(type, "best") == 0) { - int i; - // try for the best backing store type, according to our - // opinionated preferences - for (i=0; best_options[i]; i++) { - if ((bdev = do_bdev_create(dest, best_options[i], cname, specs))) - return bdev; - } - return NULL; // 'dir' should never fail, so this shouldn't happen - } - - // -B lvm,dir - if (strchr(type, ',') != NULL) { - char *dup = alloca(strlen(type)+1), *saveptr = NULL, *token; - strcpy(dup, type); - for (token = strtok_r(dup, ",", &saveptr); token; - token = strtok_r(NULL, ",", &saveptr)) { - if ((bdev = do_bdev_create(dest, token, cname, specs))) - return bdev; - } - } - - return do_bdev_create(dest, type, cname, specs); -} - -bool rootfs_is_blockdev(struct lxc_conf *conf) -{ - const struct bdev_type *q; - struct stat st; - int ret; - - if (!conf->rootfs.path || strcmp(conf->rootfs.path, "/") == 0 || - strlen(conf->rootfs.path) == 0) - return false; - - ret = stat(conf->rootfs.path, &st); - if (ret == 0 && S_ISBLK(st.st_mode)) - return true; - q = bdev_query(conf->rootfs.path); - if (!q) - return false; - if (strcmp(q->name, "lvm") == 0 || - strcmp(q->name, "loop") == 0 || - strcmp(q->name, "nbd") == 0) - return true; - return false; -} - -bool bdev_destroy(struct lxc_conf *conf) -{ - struct bdev *r; - bool ret = false; - - r = bdev_init(conf, conf->rootfs.path, conf->rootfs.mount, NULL); - if (!r) - return ret; - - if (r->ops->destroy(r) == 0) - ret = true; - bdev_put(r); - - return ret; -} - -int bdev_destroy_wrapper(void *data) -{ - struct lxc_conf *conf = data; - - if (setgid(0) < 0) { - ERROR("Failed to setgid to 0"); - return -1; - } - if (setgroups(0, NULL) < 0) - WARN("Failed to clear groups"); - if (setuid(0) < 0) { - ERROR("Failed to setuid to 0"); - return -1; - } - if (!bdev_destroy(conf)) - return -1; - else - return 0; -} - -/* - * attach_block_device returns true if all went well, - * meaning either a block device was attached or was not - * needed. It returns false if something went wrong and - * container startup should be stopped. - */ -bool attach_block_device(struct lxc_conf *conf) -{ - char *path; - - if (!conf->rootfs.path) - return true; - path = conf->rootfs.path; - if (!requires_nbd(path)) - return true; - path = strchr(path, ':'); - if (!path) - return false; - path++; - if (!attach_nbd(path, conf)) - return false; - return true; -} - -void detach_block_device(struct lxc_conf *conf) -{ - if (conf->nbd_idx != -1) - detach_nbd_idx(conf->nbd_idx); -}