From: Christian Brauner Date: Sat, 12 Dec 2015 21:48:33 +0000 (+0100) Subject: Split bdev into modules: overlay X-Git-Tag: lxc-2.0.0.beta1~6^2~3 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=38683db41204cb221892beeb88d67a1df4a07287;p=thirdparty%2Flxc.git Split bdev into modules: overlay With this commit we start to split bdev.{c,h} into modules located in the subfolder bdev. We start by creating a module for overlay: overlay.{c,h}. - The functions: - overlayfs_detect() - overlayfs_mount() - overlayfs_umount() - overlayfs_clonepaths() - overlayfs_destroy() - overlayfs_create() move from bdev.{c,h} to overlay.{c,h}. The only thing that remains in bdev.c is the static definition of - static const struct bdev_ops overlayfs_ops - The functions: - update_ovl_paths() - overlay_getlower() move from lxccontainer.c to overlay.{c,h}. update_ovl_paths() is used to update absolute paths for overlay lxc.mount.entry entries but it seems to fit more here than into lxccontainer.c. The Function overlay_getlower() is used to extract the lower directory for overlay (and aufs) rootfs. It should at some point become a common helper. - The functions: - do_rsync() - dir_new_path() remain in bdev.c for now but become extern. We declare them extern in overlay.c to be able to call them. As the comment to them correctly notices, they should at some point become common helpers and probably move to utils.{c,h} or some other more appropriate place. - The structs: - struct bdev; /* defined in bdev.h */ - struct bdev_specs; /* defined in lxccontainer.h */ - struct lxc_conf; /* defined conf.h */ are forward declared/put as incomplete types in overlay.h so that the functions have access to it. - The header overlay.h is *not* included in bdev.h but only in bdev.c so that when bdev.h is included the public functions in overlay.h cannot be accessed, i.e. if an implementation wants to call functions from overlay.h they need to explicitly include it. (As is e.g. done in the case of lxccontainer.c.) - The header - lxc-btrfs.h also moves to the bdev subfolder. - Adapt Makefile.am to the new bdev layout. Signed-off-by: Christian Brauner --- diff --git a/src/lxc/Makefile.am b/src/lxc/Makefile.am index 695308266..44945d070 100644 --- a/src/lxc/Makefile.am +++ b/src/lxc/Makefile.am @@ -6,7 +6,9 @@ pkginclude_HEADERS = \ noinst_HEADERS = \ arguments.h \ attach.h \ - bdev.h \ + bdev/bdev.h \ + bdev/lxc-btrfs.h \ + bdev/overlay.h \ caps.h \ cgroup.h \ conf.h \ @@ -16,7 +18,6 @@ noinst_HEADERS = \ list.h \ log.h \ lxc.h \ - lxc-btrfs.h \ lxclock.h \ monitor.h \ namespace.h \ @@ -60,7 +61,7 @@ endif liblxc_so_SOURCES = \ arguments.c arguments.h \ - bdev.c bdev.h lxc-btrfs.h \ + bdev/bdev.c bdev/bdev.h bdev/overlay.c bdev/overlay.h bdev/lxc-btrfs.h \ commands.c commands.h \ start.c start.h \ execute.c \ diff --git a/src/lxc/bdev.c b/src/lxc/bdev/bdev.c similarity index 86% rename from src/lxc/bdev.c rename to src/lxc/bdev/bdev.c index ae06caef0..479a09ac5 100644 --- a/src/lxc/bdev.c +++ b/src/lxc/bdev/bdev.c @@ -27,33 +27,38 @@ * I'm doing by calling out to userspace should sometimes be done through * libraries like liblvm2 */ + #define _GNU_SOURCE -#include -#include -#include -#include -#include -#include +#include #include -#include -#include -#include +#include +#include +#include #include +#include +#include +#include +#include +#include #include -#include +#include #include +#include +#include +#include -#include "lxc.h" +#include "bdev.h" #include "config.h" #include "conf.h" -#include "bdev.h" -#include "log.h" #include "error.h" -#include "utils.h" -#include "namespace.h" -#include "parse.h" +#include "log.h" +#include "lxc.h" #include "lxclock.h" #include "lxc-btrfs.h" +#include "namespace.h" +#include "overlay.h" /* overlay */ +#include "parse.h" +#include "utils.h" #ifndef BLKGETSIZE64 #define BLKGETSIZE64 _IOR(0x12,114,size_t) @@ -72,17 +77,13 @@ lxc_log_define(bdev, lxc); -struct ovl_rsync_data { - struct bdev *orig; - struct bdev *new; -}; - struct rsync_data_char { char *src; char *dest; }; -static int do_rsync(const char *src, const char *dest) +/* the bulk of this needs to become a common helper */ +int do_rsync(const char *src, const char *dest) { // call out to rsync pid_t pid; @@ -107,6 +108,48 @@ static int do_rsync(const char *src, const char *dest) exit(1); } +/* the bulk of this needs to become a common helper */ +char *dir_new_path(char *src, const char *oldname, const char *name, + const char *oldpath, const char *lxcpath) +{ + char *ret, *p, *p2; + int l1, l2, nlen; + + nlen = strlen(src) + 1; + l1 = strlen(oldpath); + p = src; + /* if src starts with oldpath, look for oldname only after + * that path */ + if (strncmp(src, oldpath, l1) == 0) { + p += l1; + nlen += (strlen(lxcpath) - l1); + } + l2 = strlen(oldname); + while ((p = strstr(p, oldname)) != NULL) { + p += l2; + nlen += strlen(name) - l2; + } + + ret = malloc(nlen); + if (!ret) + return NULL; + + p = ret; + if (strncmp(src, oldpath, l1) == 0) { + p += sprintf(p, "%s", lxcpath); + src += l1; + } + + while ((p2 = strstr(src, oldname)) != NULL) { + strncpy(p, src, p2-src); // copy text up to oldname + p += p2-src; // move target pointer (p) + p += sprintf(p, "%s", name); // print new name in place of oldname + src = p2 + l2; // move src to end of oldname + } + sprintf(p, "%s", src); // copy the rest of src + return ret; +} + /* * return block size of dev->src in units of bytes */ @@ -404,48 +447,6 @@ static int dir_umount(struct bdev *bdev) return umount(bdev->dest); } -/* the bulk of this needs to become a common helper */ -static char *dir_new_path(char *src, const char *oldname, const char *name, - const char *oldpath, const char *lxcpath) -{ - char *ret, *p, *p2; - int l1, l2, nlen; - - nlen = strlen(src) + 1; - l1 = strlen(oldpath); - p = src; - /* if src starts with oldpath, look for oldname only after - * that path */ - if (strncmp(src, oldpath, l1) == 0) { - p += l1; - nlen += (strlen(lxcpath) - l1); - } - l2 = strlen(oldname); - while ((p = strstr(p, oldname)) != NULL) { - p += l2; - nlen += strlen(name) - l2; - } - - ret = malloc(nlen); - if (!ret) - return NULL; - - p = ret; - if (strncmp(src, oldpath, l1) == 0) { - p += sprintf(p, "%s", lxcpath); - src += l1; - } - - while ((p2 = strstr(src, oldname)) != NULL) { - strncpy(p, src, p2-src); // copy text up to oldname - p += p2-src; // move target pointer (p) - p += sprintf(p, "%s", name); // print new name in place of oldname - src = p2 + l2; // move src to end of oldname - } - sprintf(p, "%s", src); // copy the rest of src - return ret; -} - /* * for a simple directory bind mount, we substitute the old container * name and paths for the new @@ -2321,158 +2322,6 @@ static const struct bdev_ops loop_ops = { .can_backup = true, }; -// -// overlayfs ops -// - -static int overlayfs_detect(const char *path) -{ - if (strncmp(path, "overlayfs:", 10) == 0) - return 1; // take their word for it - return 0; -} - -static char *overlayfs_name; -static char *detect_overlayfs_name(void) -{ - char *v = "overlayfs"; - char *line = NULL; - size_t len = 0; - FILE *f = fopen("/proc/filesystems", "r"); - if (!f) - return v; - - while (getline(&line, &len, f) != -1) { - if (strcmp(line, "nodev\toverlay\n") == 0) { - v = "overlay"; - break; - } - } - - fclose(f); - free(line); - return v; -} - -// -// XXXXXXX plain directory bind mount ops -// -static int overlayfs_mount(struct bdev *bdev) -{ - char *options, *dup, *lower, *upper; - char *options_work, *work, *lastslash; - int lastslashidx; - int len, len2; - unsigned long mntflags; - char *mntdata; - int ret, ret2; - - if (strcmp(bdev->type, "overlayfs")) - return -22; - if (!bdev->src || !bdev->dest) - return -22; - - if (!overlayfs_name) - overlayfs_name = detect_overlayfs_name(); - - // separately mount it first - // mount -t overlayfs -oupperdir=${upper},lowerdir=${lower} lower dest - dup = alloca(strlen(bdev->src)+1); - strcpy(dup, bdev->src); - if (!(lower = strchr(dup, ':'))) - return -22; - if (!(upper = strchr(++lower, ':'))) - return -22; - *upper = '\0'; - upper++; - - // if delta doesn't yet exist, create it - if (mkdir_p(upper, 0755) < 0 && errno != EEXIST) - return -22; - - // overlayfs.v22 or higher needs workdir option - // if upper is /var/lib/lxc/c2/delta0, - // then workdir is /var/lib/lxc/c2/olwork - lastslash = strrchr(upper, '/'); - if (!lastslash) - return -22; - lastslash++; - lastslashidx = lastslash - upper; - - work = alloca(lastslashidx + 7); - strncpy(work, upper, lastslashidx+7); - strcpy(work+lastslashidx, "olwork"); - - if (parse_mntopts(bdev->mntopts, &mntflags, &mntdata) < 0) { - free(mntdata); - return -22; - } - - if (mkdir_p(work, 0755) < 0 && errno != EEXIST) { - free(mntdata); - return -22; - } - - // TODO We should check whether bdev->src is a blockdev, and if so - // but for now, only support overlays of a basic directory - - if (mntdata) { - len = strlen(lower) + strlen(upper) + strlen("upperdir=,lowerdir=,") + strlen(mntdata) + 1; - options = alloca(len); - ret = snprintf(options, len, "upperdir=%s,lowerdir=%s,%s", upper, lower, mntdata); - - len2 = strlen(lower) + strlen(upper) + strlen(work) - + strlen("upperdir=,lowerdir=,workdir=") + strlen(mntdata) + 1; - options_work = alloca(len2); - ret2 = snprintf(options, len2, "upperdir=%s,lowerdir=%s,workdir=%s,%s", - upper, lower, work, mntdata); - } - else { - len = strlen(lower) + strlen(upper) + strlen("upperdir=,lowerdir=") + 1; - options = alloca(len); - ret = snprintf(options, len, "upperdir=%s,lowerdir=%s", upper, lower); - - len2 = strlen(lower) + strlen(upper) + strlen(work) - + strlen("upperdir=,lowerdir=,workdir=") + 1; - options_work = alloca(len2); - ret2 = snprintf(options_work, len2, "upperdir=%s,lowerdir=%s,workdir=%s", - upper, lower, work); - } - if (ret < 0 || ret >= len || ret2 < 0 || ret2 >= len2) { - free(mntdata); - return -1; - } - - // mount without workdir option for overlayfs before v21 - ret = mount(lower, bdev->dest, overlayfs_name, MS_MGC_VAL | mntflags, options); - if (ret < 0) { - INFO("overlayfs: error mounting %s onto %s options %s. retry with workdir", - lower, bdev->dest, options); - - // retry with workdir option for overlayfs v22 and higher - ret = mount(lower, bdev->dest, overlayfs_name, MS_MGC_VAL | mntflags, options_work); - if (ret < 0) - SYSERROR("overlayfs: error mounting %s onto %s options %s", - lower, bdev->dest, options_work); - else - INFO("overlayfs: mounted %s onto %s options %s", - lower, bdev->dest, options_work); - } - else - INFO("overlayfs: mounted %s onto %s options %s", - lower, bdev->dest, options); - return ret; -} - -static int overlayfs_umount(struct bdev *bdev) -{ - if (strcmp(bdev->type, "overlayfs")) - return -22; - if (!bdev->src || !bdev->dest) - return -22; - return umount(bdev->dest); -} - static int rsync_delta(struct rsync_data_char *data) { if (setgid(0) < 0) { @@ -2499,296 +2348,7 @@ static int rsync_delta_wrapper(void *data) return rsync_delta(arg); } -static int ovl_rsync(struct ovl_rsync_data *data) -{ - int ret; - - if (setgid(0) < 0) { - ERROR("Failed to setgid to 0"); - return -1; - } - if (setgroups(0, NULL) < 0) - WARN("Failed to clear groups"); - if (setuid(0) < 0) { - ERROR("Failed to setuid to 0"); - return -1; - } - - if (unshare(CLONE_NEWNS) < 0) { - SYSERROR("Unable to unshare mounts ns"); - return -1; - } - if (detect_shared_rootfs()) { - if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL)) { - SYSERROR("Failed to make / rslave"); - ERROR("Continuing..."); - } - } - if (overlayfs_mount(data->orig) < 0) { - ERROR("Failed mounting original container fs"); - return -1; - } - if (overlayfs_mount(data->new) < 0) { - ERROR("Failed mounting new container fs"); - return -1; - } - ret = do_rsync(data->orig->dest, data->new->dest); - - overlayfs_umount(data->new); - overlayfs_umount(data->orig); - - if (ret < 0) { - ERROR("rsyncing %s to %s", data->orig->dest, data->new->dest); - return -1; - } - - return 0; -} - -static int ovl_rsync_wrapper(void *data) -{ - struct ovl_rsync_data *arg = data; - return ovl_rsync(arg); -} - -static int ovl_do_rsync(struct bdev *orig, struct bdev *new, struct lxc_conf *conf) -{ - int ret = -1; - struct ovl_rsync_data rdata; - - rdata.orig = orig; - rdata.new = new; - if (am_unpriv()) - ret = userns_exec_1(conf, ovl_rsync_wrapper, &rdata); - else - ret = ovl_rsync(&rdata); - if (ret) - ERROR("copying overlayfs delta"); - - return ret; -} - -static int overlayfs_clonepaths(struct bdev *orig, struct bdev *new, const char *oldname, - const char *cname, const char *oldpath, const char *lxcpath, int snap, - uint64_t newsize, struct lxc_conf *conf) -{ - if (!snap) { - ERROR("overlayfs is only for snapshot clones"); - return -22; - } - - if (!orig->src || !orig->dest) - return -1; - - new->dest = dir_new_path(orig->dest, oldname, cname, oldpath, lxcpath); - if (!new->dest) - return -1; - if (mkdir_p(new->dest, 0755) < 0) - return -1; - - if (am_unpriv() && chown_mapped_root(new->dest, conf) < 0) - WARN("Failed to update ownership of %s", new->dest); - - if (strcmp(orig->type, "dir") == 0) { - char *delta, *lastslash; - char *work; - int ret, len, lastslashidx; - - // if we have /var/lib/lxc/c2/rootfs, then delta will be - // /var/lib/lxc/c2/delta0 - lastslash = strrchr(new->dest, '/'); - if (!lastslash) - return -22; - if (strlen(lastslash) < 7) - return -22; - lastslash++; - lastslashidx = lastslash - new->dest; - - delta = malloc(lastslashidx + 7); - if (!delta) - return -1; - strncpy(delta, new->dest, lastslashidx+1); - strcpy(delta+lastslashidx, "delta0"); - if ((ret = mkdir(delta, 0755)) < 0) { - SYSERROR("error: mkdir %s", delta); - free(delta); - return -1; - } - if (am_unpriv() && chown_mapped_root(delta, conf) < 0) - WARN("Failed to update ownership of %s", delta); - - // make workdir for overlayfs.v22 or higher - // workdir is /var/lib/lxc/c2/olwork - // it is used to prepare files before atomically swithing with destination, - // and needs to be on the same filesystem as upperdir, - // so it's OK for it to be empty. - work = malloc(lastslashidx + 7); - if (!work) { - free(delta); - return -1; - } - strncpy(work, new->dest, lastslashidx+1); - strcpy(work+lastslashidx, "olwork"); - if (mkdir(work, 0755) < 0) { - SYSERROR("error: mkdir %s", work); - free(delta); - free(work); - return -1; - } - if (am_unpriv() && chown_mapped_root(work, conf) < 0) - WARN("Failed to update ownership of %s", work); - free(work); - - // the src will be 'overlayfs:lowerdir:upperdir' - len = strlen(delta) + strlen(orig->src) + 12; - new->src = malloc(len); - if (!new->src) { - free(delta); - return -ENOMEM; - } - ret = snprintf(new->src, len, "overlayfs:%s:%s", orig->src, delta); - free(delta); - if (ret < 0 || ret >= len) - return -ENOMEM; - } else if (strcmp(orig->type, "overlayfs") == 0) { - // What exactly do we want to do here? - // I think we want to use the original lowerdir, with a - // private delta which is originally rsynced from the - // original delta - char *osrc, *odelta, *nsrc, *ndelta, *work; - char *lastslash; - int len, ret, lastslashidx; - if (!(osrc = strdup(orig->src))) - return -22; - nsrc = strchr(osrc, ':') + 1; - if (nsrc != osrc + 10 || (odelta = strchr(nsrc, ':')) == NULL) { - free(osrc); - return -22; - } - *odelta = '\0'; - odelta++; - ndelta = dir_new_path(odelta, oldname, cname, oldpath, lxcpath); - if (!ndelta) { - free(osrc); - return -ENOMEM; - } - if ((ret = mkdir(ndelta, 0755)) < 0 && errno != EEXIST) { - SYSERROR("error: mkdir %s", ndelta); - free(osrc); - free(ndelta); - return -1; - } - if (am_unpriv() && chown_mapped_root(ndelta, conf) < 0) - WARN("Failed to update ownership of %s", ndelta); - - // make workdir for overlayfs.v22 or higher - // for details, see above. - lastslash = strrchr(ndelta, '/'); - if (!lastslash) - return -1; - lastslash++; - lastslashidx = lastslash - ndelta; - - work = malloc(lastslashidx + 7); - if (!work) - return -1; - strncpy(work, ndelta, lastslashidx+1); - strcpy(work+lastslashidx, "olwork"); - if ((mkdir(work, 0755) < 0) && errno != EEXIST) { - SYSERROR("error: mkdir %s", work); - free(work); - return -1; - } - if (am_unpriv() && chown_mapped_root(work, conf) < 0) - WARN("Failed to update ownership of %s", work); - free(work); - - len = strlen(nsrc) + strlen(ndelta) + 12; - new->src = malloc(len); - if (!new->src) { - free(osrc); - free(ndelta); - return -ENOMEM; - } - ret = snprintf(new->src, len, "overlayfs:%s:%s", nsrc, ndelta); - free(osrc); - free(ndelta); - if (ret < 0 || ret >= len) - return -ENOMEM; - - return ovl_do_rsync(orig, new, conf); - } else { - ERROR("overlayfs clone of %s container is not yet supported", - orig->type); - // Note, supporting this will require overlayfs_mount supporting - // mounting of the underlay. No big deal, just needs to be done. - return -1; - } - - return 0; -} - -static int overlayfs_destroy(struct bdev *orig) -{ - char *upper; - - if (strncmp(orig->src, "overlayfs:", 10) != 0) - return -22; - upper = strchr(orig->src + 10, ':'); - if (!upper) - return -22; - upper++; - return lxc_rmdir_onedev(upper, NULL); -} - -/* - * to say 'lxc-create -t ubuntu -n o1 -B overlayfs' means you want - * $lxcpath/$lxcname/rootfs to have the created container, while all - * changes after starting the container are written to - * $lxcpath/$lxcname/delta0 - */ -static int overlayfs_create(struct bdev *bdev, const char *dest, const char *n, - struct bdev_specs *specs) -{ - char *delta; - int ret, len = strlen(dest), newlen; - - if (len < 8 || strcmp(dest+len-7, "/rootfs") != 0) - return -1; - - if (!(bdev->dest = strdup(dest))) { - ERROR("Out of memory"); - return -1; - } - - delta = alloca(strlen(dest)+1); - strcpy(delta, dest); - strcpy(delta+len-6, "delta0"); - - if (mkdir_p(delta, 0755) < 0) { - ERROR("Error creating %s", delta); - return -1; - } - - /* overlayfs:lower:upper */ - newlen = (2 * len) + strlen("overlayfs:") + 2; - bdev->src = malloc(newlen); - if (!bdev->src) { - ERROR("Out of memory"); - return -1; - } - ret = snprintf(bdev->src, newlen, "overlayfs:%s:%s", dest, delta); - if (ret < 0 || ret >= newlen) - return -1; - - if (mkdir_p(bdev->dest, 0755) < 0) { - ERROR("Error creating %s", bdev->dest); - return -1; - } - - return 0; -} - +/* overlay */ static const struct bdev_ops overlayfs_ops = { .detect = &overlayfs_detect, .mount = &overlayfs_mount, @@ -3818,14 +3378,6 @@ struct bdev *bdev_create(const char *dest, const char *type, return do_bdev_create(dest, type, cname, specs); } -char *overlay_getlower(char *p) -{ - char *p1 = strchr(p, ':'); - if (p1) - *p1 = '\0'; - return p; -} - bool rootfs_is_blockdev(struct lxc_conf *conf) { const struct bdev_type *q; diff --git a/src/lxc/bdev.h b/src/lxc/bdev/bdev.h similarity index 99% rename from src/lxc/bdev.h rename to src/lxc/bdev/bdev.h index f7b3fcae0..2e564aa6d 100644 --- a/src/lxc/bdev.h +++ b/src/lxc/bdev/bdev.h @@ -27,11 +27,11 @@ * aufs, dir, raw, btrfs, overlayfs, aufs, lvm, loop, zfs, nbd (qcow2, raw, vdi, qed) */ -#include "config.h" #include #include #include +#include "config.h" /* define constants if the kernel/glibc headers don't define them */ #ifndef MS_DIRSYNC @@ -97,8 +97,6 @@ struct bdev { int nbd_idx; }; -char *overlay_getlower(char *p); - bool bdev_is_dir(struct lxc_conf *conf, const char *path); bool bdev_can_backup(struct lxc_conf *conf); diff --git a/src/lxc/lxc-btrfs.h b/src/lxc/bdev/lxc-btrfs.h similarity index 100% rename from src/lxc/lxc-btrfs.h rename to src/lxc/bdev/lxc-btrfs.h diff --git a/src/lxc/bdev/overlay.c b/src/lxc/bdev/overlay.c new file mode 100644 index 000000000..3b5c8268b --- /dev/null +++ b/src/lxc/bdev/overlay.c @@ -0,0 +1,594 @@ +/* + * lxc: linux Container library + * + * (C) Copyright IBM Corp. 2007, 2008 + * + * Authors: + * Daniel Lezcano + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#define _GNU_SOURCE +#include +#include +#include +#include + +#include "bdev.h" +#include "conf.h" +#include "confile.h" +#include "log.h" +#include "lxccontainer.h" +#include "overlay.h" +#include "utils.h" + +lxc_log_define(overlay, lxc); + +struct ovl_rsync_data { + struct bdev *orig; + struct bdev *new; +}; + +/* defined in lxccontainer.c: needs to become common helper */ +extern int do_rsync(const char *src, const char *dest); + +/* defined in lxccontainer.c: needs to become common helper */ +extern char *dir_new_path(char *src, const char *oldname, const char *name, + const char *oldpath, const char *lxcpath); + +char *overlay_getlower(char *p) +{ + char *p1 = strchr(p, ':'); + if (p1) + *p1 = '\0'; + return p; +} + +int overlayfs_detect(const char *path) +{ + if (strncmp(path, "overlayfs:", 10) == 0) + return 1; // take their word for it + return 0; +} + +static char *overlayfs_name; +static char *detect_overlayfs_name(void) +{ + char *v = "overlayfs"; + char *line = NULL; + size_t len = 0; + FILE *f = fopen("/proc/filesystems", "r"); + if (!f) + return v; + + while (getline(&line, &len, f) != -1) { + if (strcmp(line, "nodev\toverlay\n") == 0) { + v = "overlay"; + break; + } + } + + fclose(f); + free(line); + return v; +} + +/* XXXXXXX plain directory bind mount ops */ +int overlayfs_mount(struct bdev *bdev) +{ + char *options, *dup, *lower, *upper; + char *options_work, *work, *lastslash; + int lastslashidx; + int len, len2; + unsigned long mntflags; + char *mntdata; + int ret, ret2; + + if (strcmp(bdev->type, "overlayfs")) + return -22; + if (!bdev->src || !bdev->dest) + return -22; + + // defined in bdev.c + if (!overlayfs_name) + overlayfs_name = detect_overlayfs_name(); + + // separately mount it first + // mount -t overlayfs -oupperdir=${upper},lowerdir=${lower} lower dest + dup = alloca(strlen(bdev->src)+1); + strcpy(dup, bdev->src); + if (!(lower = strchr(dup, ':'))) + return -22; + if (!(upper = strchr(++lower, ':'))) + return -22; + *upper = '\0'; + upper++; + + // if delta doesn't yet exist, create it + if (mkdir_p(upper, 0755) < 0 && errno != EEXIST) + return -22; + + // overlayfs.v22 or higher needs workdir option + // if upper is /var/lib/lxc/c2/delta0, + // then workdir is /var/lib/lxc/c2/olwork + lastslash = strrchr(upper, '/'); + if (!lastslash) + return -22; + lastslash++; + lastslashidx = lastslash - upper; + + work = alloca(lastslashidx + 7); + strncpy(work, upper, lastslashidx+7); + strcpy(work+lastslashidx, "olwork"); + + if (parse_mntopts(bdev->mntopts, &mntflags, &mntdata) < 0) { + free(mntdata); + return -22; + } + + if (mkdir_p(work, 0755) < 0 && errno != EEXIST) { + free(mntdata); + return -22; + } + + // TODO We should check whether bdev->src is a blockdev, and if so + // but for now, only support overlays of a basic directory + + if (mntdata) { + len = strlen(lower) + strlen(upper) + strlen("upperdir=,lowerdir=,") + strlen(mntdata) + 1; + options = alloca(len); + ret = snprintf(options, len, "upperdir=%s,lowerdir=%s,%s", upper, lower, mntdata); + + len2 = strlen(lower) + strlen(upper) + strlen(work) + + strlen("upperdir=,lowerdir=,workdir=") + strlen(mntdata) + 1; + options_work = alloca(len2); + ret2 = snprintf(options, len2, "upperdir=%s,lowerdir=%s,workdir=%s,%s", + upper, lower, work, mntdata); + } + else { + len = strlen(lower) + strlen(upper) + strlen("upperdir=,lowerdir=") + 1; + options = alloca(len); + ret = snprintf(options, len, "upperdir=%s,lowerdir=%s", upper, lower); + + len2 = strlen(lower) + strlen(upper) + strlen(work) + + strlen("upperdir=,lowerdir=,workdir=") + 1; + options_work = alloca(len2); + ret2 = snprintf(options_work, len2, "upperdir=%s,lowerdir=%s,workdir=%s", + upper, lower, work); + } + if (ret < 0 || ret >= len || ret2 < 0 || ret2 >= len2) { + free(mntdata); + return -1; + } + + // mount without workdir option for overlayfs before v21 + ret = mount(lower, bdev->dest, overlayfs_name, MS_MGC_VAL | mntflags, options); + if (ret < 0) { + INFO("overlayfs: error mounting %s onto %s options %s. retry with workdir", + lower, bdev->dest, options); + + // retry with workdir option for overlayfs v22 and higher + ret = mount(lower, bdev->dest, overlayfs_name, MS_MGC_VAL | mntflags, options_work); + if (ret < 0) + SYSERROR("overlayfs: error mounting %s onto %s options %s", + lower, bdev->dest, options_work); + else + INFO("overlayfs: mounted %s onto %s options %s", + lower, bdev->dest, options_work); + } + else + INFO("overlayfs: mounted %s onto %s options %s", + lower, bdev->dest, options); + return ret; +} + +int overlayfs_umount(struct bdev *bdev) +{ + if (strcmp(bdev->type, "overlayfs")) + return -22; + if (!bdev->src || !bdev->dest) + return -22; + return umount(bdev->dest); +} + +static int ovl_rsync(struct ovl_rsync_data *data) +{ + int ret; + + if (setgid(0) < 0) { + ERROR("Failed to setgid to 0"); + return -1; + } + if (setgroups(0, NULL) < 0) + WARN("Failed to clear groups"); + if (setuid(0) < 0) { + ERROR("Failed to setuid to 0"); + return -1; + } + + if (unshare(CLONE_NEWNS) < 0) { + SYSERROR("Unable to unshare mounts ns"); + return -1; + } + if (detect_shared_rootfs()) { + if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL)) { + SYSERROR("Failed to make / rslave"); + ERROR("Continuing..."); + } + } + if (overlayfs_mount(data->orig) < 0) { + ERROR("Failed mounting original container fs"); + return -1; + } + if (overlayfs_mount(data->new) < 0) { + ERROR("Failed mounting new container fs"); + return -1; + } + ret = do_rsync(data->orig->dest, data->new->dest); + + overlayfs_umount(data->new); + overlayfs_umount(data->orig); + + if (ret < 0) { + ERROR("rsyncing %s to %s", data->orig->dest, data->new->dest); + return -1; + } + + return 0; +} + +static int ovl_rsync_wrapper(void *data) +{ + struct ovl_rsync_data *arg = data; + return ovl_rsync(arg); +} + +static int ovl_do_rsync(struct bdev *orig, struct bdev *new, struct lxc_conf *conf) +{ + int ret = -1; + struct ovl_rsync_data rdata; + + rdata.orig = orig; + rdata.new = new; + if (am_unpriv()) + ret = userns_exec_1(conf, ovl_rsync_wrapper, &rdata); + else + ret = ovl_rsync(&rdata); + if (ret) + ERROR("copying overlayfs delta"); + + return ret; +} + +int overlayfs_clonepaths(struct bdev *orig, struct bdev *new, const char *oldname, + const char *cname, const char *oldpath, const char *lxcpath, int snap, + uint64_t newsize, struct lxc_conf *conf) +{ + if (!snap) { + ERROR("overlayfs is only for snapshot clones"); + return -22; + } + + if (!orig->src || !orig->dest) + return -1; + + new->dest = dir_new_path(orig->dest, oldname, cname, oldpath, lxcpath); + if (!new->dest) + return -1; + if (mkdir_p(new->dest, 0755) < 0) + return -1; + + if (am_unpriv() && chown_mapped_root(new->dest, conf) < 0) + WARN("Failed to update ownership of %s", new->dest); + + if (strcmp(orig->type, "dir") == 0) { + char *delta, *lastslash; + char *work; + int ret, len, lastslashidx; + + // if we have /var/lib/lxc/c2/rootfs, then delta will be + // /var/lib/lxc/c2/delta0 + lastslash = strrchr(new->dest, '/'); + if (!lastslash) + return -22; + if (strlen(lastslash) < 7) + return -22; + lastslash++; + lastslashidx = lastslash - new->dest; + + delta = malloc(lastslashidx + 7); + if (!delta) + return -1; + strncpy(delta, new->dest, lastslashidx+1); + strcpy(delta+lastslashidx, "delta0"); + if ((ret = mkdir(delta, 0755)) < 0) { + SYSERROR("error: mkdir %s", delta); + free(delta); + return -1; + } + if (am_unpriv() && chown_mapped_root(delta, conf) < 0) + WARN("Failed to update ownership of %s", delta); + + // make workdir for overlayfs.v22 or higher + // workdir is /var/lib/lxc/c2/olwork + // it is used to prepare files before atomically swithing with destination, + // and needs to be on the same filesystem as upperdir, + // so it's OK for it to be empty. + work = malloc(lastslashidx + 7); + if (!work) { + free(delta); + return -1; + } + strncpy(work, new->dest, lastslashidx+1); + strcpy(work+lastslashidx, "olwork"); + if (mkdir(work, 0755) < 0) { + SYSERROR("error: mkdir %s", work); + free(delta); + free(work); + return -1; + } + if (am_unpriv() && chown_mapped_root(work, conf) < 0) + WARN("Failed to update ownership of %s", work); + free(work); + + // the src will be 'overlayfs:lowerdir:upperdir' + len = strlen(delta) + strlen(orig->src) + 12; + new->src = malloc(len); + if (!new->src) { + free(delta); + return -ENOMEM; + } + ret = snprintf(new->src, len, "overlayfs:%s:%s", orig->src, delta); + free(delta); + if (ret < 0 || ret >= len) + return -ENOMEM; + } else if (strcmp(orig->type, "overlayfs") == 0) { + // What exactly do we want to do here? + // I think we want to use the original lowerdir, with a + // private delta which is originally rsynced from the + // original delta + char *osrc, *odelta, *nsrc, *ndelta, *work; + char *lastslash; + int len, ret, lastslashidx; + if (!(osrc = strdup(orig->src))) + return -22; + nsrc = strchr(osrc, ':') + 1; + if (nsrc != osrc + 10 || (odelta = strchr(nsrc, ':')) == NULL) { + free(osrc); + return -22; + } + *odelta = '\0'; + odelta++; + ndelta = dir_new_path(odelta, oldname, cname, oldpath, lxcpath); + if (!ndelta) { + free(osrc); + return -ENOMEM; + } + if ((ret = mkdir(ndelta, 0755)) < 0 && errno != EEXIST) { + SYSERROR("error: mkdir %s", ndelta); + free(osrc); + free(ndelta); + return -1; + } + if (am_unpriv() && chown_mapped_root(ndelta, conf) < 0) + WARN("Failed to update ownership of %s", ndelta); + + // make workdir for overlayfs.v22 or higher + // for details, see above. + lastslash = strrchr(ndelta, '/'); + if (!lastslash) + return -1; + lastslash++; + lastslashidx = lastslash - ndelta; + + work = malloc(lastslashidx + 7); + if (!work) + return -1; + strncpy(work, ndelta, lastslashidx+1); + strcpy(work+lastslashidx, "olwork"); + if ((mkdir(work, 0755) < 0) && errno != EEXIST) { + SYSERROR("error: mkdir %s", work); + free(work); + return -1; + } + if (am_unpriv() && chown_mapped_root(work, conf) < 0) + WARN("Failed to update ownership of %s", work); + free(work); + + len = strlen(nsrc) + strlen(ndelta) + 12; + new->src = malloc(len); + if (!new->src) { + free(osrc); + free(ndelta); + return -ENOMEM; + } + ret = snprintf(new->src, len, "overlayfs:%s:%s", nsrc, ndelta); + free(osrc); + free(ndelta); + if (ret < 0 || ret >= len) + return -ENOMEM; + + return ovl_do_rsync(orig, new, conf); + } else { + ERROR("overlayfs clone of %s container is not yet supported", + orig->type); + // Note, supporting this will require overlayfs_mount supporting + // mounting of the underlay. No big deal, just needs to be done. + return -1; + } + + return 0; +} + +int overlayfs_destroy(struct bdev *orig) +{ + char *upper; + + if (strncmp(orig->src, "overlayfs:", 10) != 0) + return -22; + upper = strchr(orig->src + 10, ':'); + if (!upper) + return -22; + upper++; + return lxc_rmdir_onedev(upper, NULL); +} + +/* + * to say 'lxc-create -t ubuntu -n o1 -B overlayfs' means you want + * $lxcpath/$lxcname/rootfs to have the created container, while all + * changes after starting the container are written to + * $lxcpath/$lxcname/delta0 + */ +int overlayfs_create(struct bdev *bdev, const char *dest, const char *n, + struct bdev_specs *specs) +{ + char *delta; + int ret, len = strlen(dest), newlen; + + if (len < 8 || strcmp(dest+len-7, "/rootfs") != 0) + return -1; + + if (!(bdev->dest = strdup(dest))) { + ERROR("Out of memory"); + return -1; + } + + delta = alloca(strlen(dest)+1); + strcpy(delta, dest); + strcpy(delta+len-6, "delta0"); + + if (mkdir_p(delta, 0755) < 0) { + ERROR("Error creating %s", delta); + return -1; + } + + /* overlayfs:lower:upper */ + newlen = (2 * len) + strlen("overlayfs:") + 2; + bdev->src = malloc(newlen); + if (!bdev->src) { + ERROR("Out of memory"); + return -1; + } + ret = snprintf(bdev->src, newlen, "overlayfs:%s:%s", dest, delta); + if (ret < 0 || ret >= newlen) + return -1; + + if (mkdir_p(bdev->dest, 0755) < 0) { + ERROR("Error creating %s", bdev->dest); + return -1; + } + + return 0; +} + +/* + * To be called from lxcapi_clone() in lxccontainer.c: When we clone a container + * with overlay lxc.mount.entry entries we need to update absolute paths for + * upper- and workdir. This update is done in two locations: + * lxc_conf->unexpanded_config and lxc_conf->mount_list. Both updates are done + * independent of each other since lxc_conf->mountlist may container more mount + * entries (e.g. from other included files) than lxc_conf->unexpanded_config . + */ +int update_ovl_paths(struct lxc_conf *lxc_conf, const char *lxc_path, + const char *lxc_name, const char *newpath, + const char *newname) +{ + char new_upper[MAXPATHLEN]; + char new_work[MAXPATHLEN]; + char old_upper[MAXPATHLEN]; + char old_work[MAXPATHLEN]; + char *cleanpath = NULL; + int i; + int fret = -1; + int ret = 0; + struct lxc_list *iterator; + const char *ovl_dirs[] = {"br", "upperdir", "workdir"}; + + cleanpath = strdup(newpath); + if (!cleanpath) + goto err; + + remove_trailing_slashes(cleanpath); + + /* We have to update lxc_conf->unexpanded_config separately from + * lxc_conf->mount_list. */ + for (i = 0; i < sizeof(ovl_dirs) / sizeof(ovl_dirs[0]); i++) { + if (!clone_update_unexp_ovl_paths(lxc_conf, lxc_path, newpath, + lxc_name, newname, + ovl_dirs[i])) + goto err; + } + + ret = snprintf(old_work, MAXPATHLEN, "workdir=%s/%s", lxc_path, lxc_name); + if (ret < 0 || ret >= MAXPATHLEN) + goto err; + + ret = snprintf(new_work, MAXPATHLEN, "workdir=%s/%s", cleanpath, newname); + if (ret < 0 || ret >= MAXPATHLEN) + goto err; + + lxc_list_for_each(iterator, &lxc_conf->mount_list) { + char *mnt_entry = NULL; + char *new_mnt_entry = NULL; + char *tmp = NULL; + char *tmp_mnt_entry = NULL; + mnt_entry = iterator->elem; + + if (strstr(mnt_entry, "overlay")) + tmp = "upperdir"; + else if (strstr(mnt_entry, "aufs")) + tmp = "br"; + + if (!tmp) + continue; + + ret = snprintf(old_upper, MAXPATHLEN, "%s=%s/%s", tmp, lxc_path, lxc_name); + if (ret < 0 || ret >= MAXPATHLEN) + goto err; + + ret = snprintf(new_upper, MAXPATHLEN, "%s=%s/%s", tmp, cleanpath, newname); + if (ret < 0 || ret >= MAXPATHLEN) + goto err; + + if (strstr(mnt_entry, old_upper)) { + tmp_mnt_entry = lxc_string_replace(old_upper, new_upper, mnt_entry); + } + + if (strstr(mnt_entry, old_work)) { + if (tmp_mnt_entry) + new_mnt_entry = lxc_string_replace(old_work, new_work, tmp_mnt_entry); + else + new_mnt_entry = lxc_string_replace(old_work, new_work, mnt_entry); + } + + if (new_mnt_entry) { + free(iterator->elem); + iterator->elem = strdup(new_mnt_entry); + } else if (tmp_mnt_entry) { + free(iterator->elem); + iterator->elem = strdup(tmp_mnt_entry); + } + + free(new_mnt_entry); + free(tmp_mnt_entry); + } + + fret = 0; +err: + free(cleanpath); + return fret; +} + diff --git a/src/lxc/bdev/overlay.h b/src/lxc/bdev/overlay.h new file mode 100644 index 000000000..360759b70 --- /dev/null +++ b/src/lxc/bdev/overlay.h @@ -0,0 +1,70 @@ +/* + * lxc: linux Container library + * + * (C) Copyright IBM Corp. 2007, 2008 + * + * Authors: + * Daniel Lezcano + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef __LXC_OVERLAY_H +#define __LXC_OVERLAY_H + +#include +#include +#include +#include + +/* defined in bdev.h */ +struct bdev; + +/* defined in lxccontainer.h */ +struct bdev_specs; + +/* defined conf.h */ +struct lxc_conf; + +int overlayfs_detect(const char *path); +int overlayfs_mount(struct bdev *bdev); +int overlayfs_umount(struct bdev *bdev); +int overlayfs_clonepaths(struct bdev *orig, struct bdev *new, + const char *oldname, const char *cname, + const char *oldpath, const char *lxcpath, int snap, + uint64_t newsize, struct lxc_conf *conf); +int overlayfs_destroy(struct bdev *orig); +int overlayfs_create(struct bdev *bdev, const char *dest, const char *n, + struct bdev_specs *specs); + +/* + * To be called from lxcapi_clone() in lxccontainer.c: When we clone a container + * with overlay lxc.mount.entry entries we need to update absolute paths for + * upper- and workdir. This update is done in two locations: + * lxc_conf->unexpanded_config and lxc_conf->mount_list. Both updates are done + * independent of each other since lxc_conf->mountlist may container more mount + * entries (e.g. from other included files) than lxc_conf->unexpanded_config . + */ +int update_ovl_paths(struct lxc_conf *lxc_conf, const char *lxc_path, + const char *lxc_name, const char *newpath, + const char *newname); + +/* + * To be called from functions in lxccontainer.c: Get lower directory for + * overlay rootfs. + */ +char *overlay_getlower(char *p); + +#endif /* __LXC_OVERLAY_H */ diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c index 1dacc1abd..97abced97 100644 --- a/src/lxc/lxccontainer.c +++ b/src/lxc/lxccontainer.c @@ -19,48 +19,46 @@ */ #define _GNU_SOURCE -#include #include -#include -#include -#include -#include -#include -#include +#include #include #include -#include -#include -#include -#include +#include #include +#include +#include +#include #include -#include #include +#include +#include +#include +#include #include +#include +#include -#include -#include -#include - -#include "config.h" -#include "lxc.h" -#include "state.h" +#include "attach.h" +#include "bdev/bdev.h" +#include "bdev/overlay.h" +#include "cgroup.h" #include "conf.h" +#include "config.h" +#include "commands.h" #include "confile.h" #include "console.h" -#include "cgroup.h" -#include "commands.h" #include "criu.h" #include "log.h" -#include "bdev.h" -#include "utils.h" -#include "attach.h" +#include "lxc.h" +#include "lxccontainer.h" +#include "lxclock.h" #include "monitor.h" #include "namespace.h" #include "network.h" -#include "lxclock.h" #include "sync.h" +#include "state.h" +#include "utils.h" +#include "version.h" #if HAVE_IFADDRS_H #include @@ -2997,102 +2995,6 @@ static int create_file_dirname(char *path, struct lxc_conf *conf) return ret; } -/* When we clone a container with overlay lxc.mount.entry entries we need to -* update absolute paths for upper- and workdir. This update is done in two -* locations: lxc_conf->unexpanded_config and lxc_conf->mount_list. Both updates -* are done independent of each other since lxc_conf->mountlist may container -* more mount entries (e.g. from other included files) than -* lxc_conf->unexpanded_config . */ -static int update_ovl_paths(struct lxc_conf *lxc_conf, const char *lxc_path, - const char *lxc_name, const char *newpath, - const char *newname) -{ - char new_upper[MAXPATHLEN]; - char new_work[MAXPATHLEN]; - char old_upper[MAXPATHLEN]; - char old_work[MAXPATHLEN]; - char *cleanpath = NULL; - int i; - int fret = -1; - int ret = 0; - struct lxc_list *iterator; - const char *ovl_dirs[] = {"br", "upperdir", "workdir"}; - - cleanpath = strdup(newpath); - if (!cleanpath) - goto err; - - remove_trailing_slashes(cleanpath); - - /* We have to update lxc_conf->unexpanded_config separately from - * lxc_conf->mount_list. */ - for (i = 0; i < sizeof(ovl_dirs) / sizeof(ovl_dirs[0]); i++) { - if (!clone_update_unexp_ovl_paths(lxc_conf, lxc_path, newpath, - lxc_name, newname, - ovl_dirs[i])) - goto err; - } - - ret = snprintf(old_work, MAXPATHLEN, "workdir=%s/%s", lxc_path, lxc_name); - if (ret < 0 || ret >= MAXPATHLEN) - goto err; - - ret = snprintf(new_work, MAXPATHLEN, "workdir=%s/%s", cleanpath, newname); - if (ret < 0 || ret >= MAXPATHLEN) - goto err; - - lxc_list_for_each(iterator, &lxc_conf->mount_list) { - char *mnt_entry = NULL; - char *new_mnt_entry = NULL; - char *tmp = NULL; - char *tmp_mnt_entry = NULL; - mnt_entry = iterator->elem; - - if (strstr(mnt_entry, "overlay")) - tmp = "upperdir"; - else if (strstr(mnt_entry, "aufs")) - tmp = "br"; - - if (!tmp) - continue; - - ret = snprintf(old_upper, MAXPATHLEN, "%s=%s/%s", tmp, lxc_path, lxc_name); - if (ret < 0 || ret >= MAXPATHLEN) - goto err; - - ret = snprintf(new_upper, MAXPATHLEN, "%s=%s/%s", tmp, cleanpath, newname); - if (ret < 0 || ret >= MAXPATHLEN) - goto err; - - if (strstr(mnt_entry, old_upper)) { - tmp_mnt_entry = lxc_string_replace(old_upper, new_upper, mnt_entry); - } - - if (strstr(mnt_entry, old_work)) { - if (tmp_mnt_entry) - new_mnt_entry = lxc_string_replace(old_work, new_work, tmp_mnt_entry); - else - new_mnt_entry = lxc_string_replace(old_work, new_work, mnt_entry); - } - - if (new_mnt_entry) { - free(iterator->elem); - iterator->elem = strdup(new_mnt_entry); - } else if (tmp_mnt_entry) { - free(iterator->elem); - iterator->elem = strdup(tmp_mnt_entry); - } - - free(new_mnt_entry); - free(tmp_mnt_entry); - } - - fret = 0; -err: - free(cleanpath); - return fret; -} - static struct lxc_container *do_lxcapi_clone(struct lxc_container *c, const char *newname, const char *lxcpath, int flags, const char *bdevtype, const char *bdevdata, uint64_t newsize,