From: Laurent Barbe Date: Fri, 2 Oct 2015 10:45:14 +0000 (+0200) Subject: Add Ceph RBD backingstore X-Git-Tag: lxc-2.0.0.beta1~11^2~4 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=7da812df93d392d06630d2f2d7e7c9eec3693149;p=thirdparty%2Flxc.git Add Ceph RBD backingstore With lxc-create, this will create, map and mount a Rados blockdevice. A valid ceph.conf and ceph.client.admin.keyring is needed in /etc/ceph/ RBD mapping is not manage on reboot. Signed-off-by: Laurent Barbe --- diff --git a/src/lxc/arguments.h b/src/lxc/arguments.h index c2619357b..38cb0daec 100644 --- a/src/lxc/arguments.h +++ b/src/lxc/arguments.h @@ -86,6 +86,7 @@ struct lxc_arguments { char *fstype; uint64_t fssize; char *lvname, *vgname, *thinpool; + char *rbdname, *rbdpool; char *zfsroot, *lowerdir, *dir; /* lxc-execute */ diff --git a/src/lxc/bdev.c b/src/lxc/bdev.c index 846fd82ce..c932d66a5 100644 --- a/src/lxc/bdev.c +++ b/src/lxc/bdev.c @@ -1181,6 +1181,168 @@ static const struct bdev_ops lvm_ops = { .can_backup = false, }; + +/* + * CEPH RBD ops + */ + +static int rbd_detect(const char *path) +{ + if ( memcmp(path, "/dev/rbd/", 9) == 0) + return 1; + return 0; +} + +static int rbd_mount(struct bdev *bdev) +{ + if (strcmp(bdev->type, "rbd")) + return -22; + if (!bdev->src || !bdev->dest) + return -22; + + if ( !file_exists(bdev->src) ) { + // if blkdev does not exist it should be mapped, because it is not persistent on reboot + ERROR("Block device %s is not mapped.", bdev->src); + return -1; + } + + return mount_unknown_fs(bdev->src, bdev->dest, bdev->mntopts); +} + +static int rbd_umount(struct bdev *bdev) +{ + if (strcmp(bdev->type, "rbd")) + return -22; + if (!bdev->src || !bdev->dest) + return -22; + return umount(bdev->dest); +} + +static int rbd_clonepaths(struct bdev *orig, struct bdev *new, const char *oldname, + const char *cname, const char *oldpath, const char *lxcpath, int snap, + uint64_t newsize, struct lxc_conf *conf) +{ + ERROR("rbd clonepaths not implemented"); + return -1; +} + +static int rbd_destroy(struct bdev *orig) +{ + pid_t pid; + char *rbdfullname; + + if ( file_exists(orig->src) ) { + if ((pid = fork()) < 0) + return -1; + if (!pid) { + execlp("rbd", "rbd", "unmap" , orig->src, NULL); + exit(1); + } + if (wait_for_pid(pid) < 0) + return -1; + } + + if ((pid = fork()) < 0) + return -1; + if (!pid) { + rbdfullname = alloca(strlen(orig->src) - 9); + strcpy( rbdfullname, &orig->src[9] ); + execlp("rbd", "rbd", "rm" , rbdfullname, NULL); + exit(1); + } + return wait_for_pid(pid); + +} + +static int rbd_create(struct bdev *bdev, const char *dest, const char *n, + struct bdev_specs *specs) +{ + const char *rbdpool, *rbdname = n, *fstype; + uint64_t size; + int ret, len; + char sz[24]; + pid_t pid; + + if (!specs) + return -1; + + rbdpool = specs->rbd.rbdpool; + if (!rbdpool) + rbdpool = lxc_global_config_value("lxc.bdev.rbd.rbdpool"); + + if (specs->rbd.rbdname) + rbdname = specs->rbd.rbdname; + + /* source device /dev/rbd/lxc/ctn */ + len = strlen(rbdpool) + strlen(rbdname) + 11; + bdev->src = malloc(len); + if (!bdev->src) + return -1; + + ret = snprintf(bdev->src, len, "/dev/rbd/%s/%s", rbdpool, rbdname); + if (ret < 0 || ret >= len) + return -1; + + // fssize is in bytes. + size = specs->fssize; + if (!size) + size = DEFAULT_FS_SIZE; + + // in megabytes for rbd tool + ret = snprintf(sz, 24, "%"PRIu64, size / 1024 / 1024 ); + if (ret < 0 || ret >= 24) + exit(1); + + if ((pid = fork()) < 0) + return -1; + if (!pid) { + execlp("rbd", "rbd", "create" , "--pool", rbdpool, rbdname, "--size", sz, NULL); + exit(1); + } + if (wait_for_pid(pid) < 0) + return -1; + + if ((pid = fork()) < 0) + return -1; + if (!pid) { + execlp("rbd", "rbd", "map", "--pool", rbdpool, rbdname, NULL); + exit(1); + } + if (wait_for_pid(pid) < 0) + return -1; + + fstype = specs->fstype; + if (!fstype) + fstype = DEFAULT_FSTYPE; + + if (do_mkfs(bdev->src, fstype) < 0) { + ERROR("Error creating filesystem type %s on %s", fstype, + bdev->src); + return -1; + } + if (!(bdev->dest = strdup(dest))) + return -1; + + if (mkdir_p(bdev->dest, 0755) < 0) { + ERROR("Error creating %s", bdev->dest); + return -1; + } + + return 0; +} + +static const struct bdev_ops rbd_ops = { + .detect = &rbd_detect, + .mount = &rbd_mount, + .umount = &rbd_umount, + .clone_paths = &rbd_clonepaths, + .destroy = &rbd_destroy, + .create = &rbd_create, + .can_snapshot = false, + .can_backup = false, +}; + + /* * Return the full path of objid under dirid. Let's say dirid is * /lxc/c1/rootfs, and objid is /lxc/c1/rootfs/a/b/c. Then we will @@ -3237,6 +3399,7 @@ static const struct bdev_ops nbd_ops = { static const struct bdev_type bdevs[] = { {.name = "zfs", .ops = &zfs_ops,}, {.name = "lvm", .ops = &lvm_ops,}, + {.name = "rbd", .ops = &rbd_ops,}, {.name = "btrfs", .ops = &btrfs_ops,}, {.name = "dir", .ops = &dir_ops,}, {.name = "aufs", .ops = &aufs_ops,}, @@ -3596,6 +3759,7 @@ err: static struct bdev * do_bdev_create(const char *dest, const char *type, const char *cname, struct bdev_specs *specs) { + struct bdev *bdev = bdev_get(type); if (!bdev) { return NULL; @@ -3616,7 +3780,7 @@ static struct bdev * do_bdev_create(const char *dest, const char *type, * for use. Before completing, the caller will need to call the * umount operation and bdev_put(). * @dest: the mountpoint (i.e. /var/lib/lxc/$name/rootfs) - * @type: the bdevtype (dir, btrfs, zfs, etc) + * @type: the bdevtype (dir, btrfs, zfs, rbd, etc) * @cname: the container name * @specs: details about the backing store to create, like fstype */ @@ -3624,7 +3788,7 @@ struct bdev *bdev_create(const char *dest, const char *type, const char *cname, struct bdev_specs *specs) { struct bdev *bdev; - char *best_options[] = {"btrfs", "zfs", "lvm", "dir", NULL}; + char *best_options[] = {"btrfs", "zfs", "lvm", "dir", "rbd", NULL}; if (!type) return do_bdev_create(dest, "dir", cname, specs); diff --git a/src/lxc/initutils.c b/src/lxc/initutils.c index dbb5d52f5..45df60f6c 100644 --- a/src/lxc/initutils.c +++ b/src/lxc/initutils.c @@ -87,6 +87,7 @@ const char *lxc_global_config_value(const char *option_name) { "lxc.bdev.lvm.vg", DEFAULT_VG }, { "lxc.bdev.lvm.thin_pool", DEFAULT_THIN_POOL }, { "lxc.bdev.zfs.root", DEFAULT_ZFSROOT }, + { "lxc.bdev.rbd.rbdpool", DEFAULT_RBDPOOL }, { "lxc.lxcpath", NULL }, { "lxc.default_config", NULL }, { "lxc.cgroup.pattern", NULL }, diff --git a/src/lxc/initutils.h b/src/lxc/initutils.h index b4f9e545e..c021fd617 100644 --- a/src/lxc/initutils.h +++ b/src/lxc/initutils.h @@ -42,6 +42,7 @@ #define DEFAULT_VG "lxc" #define DEFAULT_THIN_POOL "lxc" #define DEFAULT_ZFSROOT "lxc" +#define DEFAULT_RBDPOOL "lxc" extern void lxc_setup_fs(void); extern const char *lxc_global_config_value(const char *option_name); diff --git a/src/lxc/lxc_create.c b/src/lxc/lxc_create.c index f1094fba8..fcbac6eda 100644 --- a/src/lxc/lxc_create.c +++ b/src/lxc/lxc_create.c @@ -81,6 +81,8 @@ static int my_parser(struct lxc_arguments* args, int c, char* arg) case '4': args->fssize = get_fssize(arg); break; case '5': args->zfsroot = arg; break; case '6': args->dir = arg; break; + case '7': args->rbdname = arg; break; + case '8': args->rbdpool = arg; break; } return 0; } @@ -96,6 +98,8 @@ static const struct option my_longopts[] = { {"fssize", required_argument, 0, '4'}, {"zfsroot", required_argument, 0, '5'}, {"dir", required_argument, 0, '6'}, + {"rbdname", required_argument, 0, '7'}, + {"rbdpool", required_argument, 0, '8'}, LXC_COMMON_OPTIONS }; @@ -142,6 +146,10 @@ Options :\n\ (Default: lxc)\n\ --thinpool=TP Use LVM thin pool called TP\n\ (Default: lxc)\n\ + --rbdname=RBDNAME Use Ceph RBD name RBDNAME\n\ + (Default: container name)\n\ + --rbdpool=POOL Use Ceph RBD pool name POOL\n\ + (Default: lxc)\n\ --fstype=TYPE Create fstype TYPE\n\ (Default: ext3)\n\ --fssize=SIZE[U] Create filesystem of size SIZE * unit U (bBkKmMgGtT)\n\ @@ -159,7 +167,8 @@ static bool validate_bdev_args(struct lxc_arguments *a) if (strcmp(a->bdevtype, "best") != 0) { if (a->fstype || a->fssize) { if (strcmp(a->bdevtype, "lvm") != 0 && - strcmp(a->bdevtype, "loop") != 0) { + strcmp(a->bdevtype, "loop") != 0 && + strcmp(a->bdevtype, "rbd") != 0) { fprintf(stderr, "filesystem type and size are only valid with block devices\n"); return false; } @@ -170,6 +179,12 @@ static bool validate_bdev_args(struct lxc_arguments *a) return false; } } + if (strcmp(a->bdevtype, "rbd") != 0) { + if (a->rbdname || a->rbdpool) { + fprintf(stderr, "--rbdname and --rbdpool are only valid with -B rbd\n"); + return false; + } + } if (strcmp(a->bdevtype, "zfs") != 0) { if (a->zfsroot) { fprintf(stderr, "zfsroot is only valid with -B zfs\n"); @@ -262,6 +277,12 @@ int main(int argc, char *argv[]) if (my_args.thinpool) spec.lvm.thinpool = my_args.thinpool; } + if (strcmp(my_args.bdevtype, "rbd") == 0 || strcmp(my_args.bdevtype, "best") == 0) { + if (my_args.rbdname) + spec.rbd.rbdname = my_args.rbdname; + if (my_args.rbdpool) + spec.rbd.rbdpool = my_args.rbdpool; + } if (my_args.dir) { spec.dir = my_args.dir; } diff --git a/src/lxc/lxccontainer.h b/src/lxc/lxccontainer.h index e909d81ca..134477ce7 100644 --- a/src/lxc/lxccontainer.h +++ b/src/lxc/lxccontainer.h @@ -857,6 +857,10 @@ struct bdev_specs { char *lv; /*!< LVM Logical Volume name */ char *thinpool; /*!< LVM thin pool to use, if any */ } lvm; + struct { + char *rbdname; /*!< RBD image name */ + char *rbdpool; /*!< Ceph pool name */ + } rbd; char *dir; /*!< Directory path */ };