From 111d01fcc76d2e7d0b05f78fae67e89cdf6856ad Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Thu, 15 May 2008 16:48:12 +1000 Subject: [PATCH 1/1] Change write_init_super to be called only once. The current model for creating arrays involves writing a superblock to each device in the array. With containers (as with DDF), that model doesn't work. Every device in the container may need to be updated for an array made from just some the devices in a container. So instead of calling write_init_super for each device, we call it once for the array and have it iterate over all the devices in the array. To help with this, ->add_to_super now passes in an 'fd' and name for the device. These get saved for use by write_init_super. So add_to_super takes ownership of the fd, and write_init_super will close it. This information is stored in the new 'info' field of supertype. As part of this, write_init_super now removes any old traces of raid metadata rather than doing this in common code. --- Create.c | 41 +++++----- Kill.c | 4 +- Manage.c | 9 ++- mdadm.c | 3 +- mdadm.h | 9 ++- super0.c | 65 +++++++++++----- super1.c | 225 +++++++++++++++++++++++++++++++------------------------ 7 files changed, 209 insertions(+), 147 deletions(-) diff --git a/Create.c b/Create.c index 629d359c..19793fa4 100644 --- a/Create.c +++ b/Create.c @@ -547,33 +547,28 @@ int Create(struct supertype *st, char *mddev, int mdfd, info.disk.state |= (1<devname, "missing")==0) { - info.disk.major = 0; - info.disk.minor = 0; - info.disk.state = (1<devname, O_RDONLY|O_EXCL, 0); - if (fd < 0) { - fprintf(stderr, Name ": failed to open %s after earlier success - aborting\n", - dv->devname); - return 1; - } - fstat(fd, &stb); - info.disk.major = major(stb.st_rdev); - info.disk.minor = minor(stb.st_rdev); - remove_partitions(fd); - close(fd); + strcasecmp(dv->devname, "missing")==0) + continue; + + fd = open(dv->devname, O_RDWR|O_EXCL, 0); + if (fd < 0) { + fprintf(stderr, Name ": failed to open %s " + "after earlier success - aborting\n", + dv->devname); + return 1; } + fstat(fd, &stb); + info.disk.major = major(stb.st_rdev); + info.disk.minor = minor(stb.st_rdev); + switch(pass){ case 1: - st->ss->add_to_super(st, &info.disk); + remove_partitions(fd); + st->ss->add_to_super(st, &info.disk, + fd, dv->devname); break; case 2: - if (info.disk.state == 1) break; - Kill(dv->devname, 0, 1); /* Just be sure it is clean */ - Kill(dv->devname, 0, 1); /* and again, there could be two superblocks */ - st->ss->write_init_super(st, &info.disk, - dv->devname); + close(fd); if (ioctl(mdfd, ADD_NEW_DISK, &info.disk)) { fprintf(stderr, Name ": ADD_NEW_DISK for %s failed: %s\n", @@ -586,6 +581,8 @@ int Create(struct supertype *st, char *mddev, int mdfd, } if (dv == moved_disk && dnum != insert_point) break; } + if (pass == 1) + st->ss->write_init_super(st); } st->ss->free_super(st); diff --git a/Kill.c b/Kill.c index 0a2763ea..5b254872 100644 --- a/Kill.c +++ b/Kill.c @@ -34,7 +34,7 @@ #include "md_u.h" #include "md_p.h" -int Kill(char *dev, int force, int quiet) +int Kill(char *dev, int force, int quiet, int noexcl) { /* * Nothing fancy about Kill. It just zeroes out a superblock @@ -44,7 +44,7 @@ int Kill(char *dev, int force, int quiet) int fd, rv = 0; struct supertype *st; - fd = open(dev, O_RDWR|O_EXCL); + fd = open(dev, noexcl ? O_RDWR : (O_RDWR|O_EXCL)); if (fd < 0) { if (!quiet) fprintf(stderr, Name ": Couldn't open %s for write - not zeroing\n", diff --git a/Manage.c b/Manage.c index 1fb84689..4202fd95 100644 --- a/Manage.c +++ b/Manage.c @@ -446,11 +446,14 @@ int Manage_subdevs(char *devname, int fd, disc.number =j; disc.state = 0; if (array.not_persistent==0) { + int dfd; if (dv->writemostly) disc.state |= 1 << MD_DISK_WRITEMOSTLY; - tst->ss->add_to_super(tst, &disc); - if (tst->ss->write_init_super(tst, &disc, - dv->devname)) + dfd = open(dv->devname, O_RDWR | O_EXCL); + tst->ss->add_to_super(tst, &disc, dfd, + dv->devname); + /* write_init_super will close 'dfd' */ + if (tst->ss->write_init_super(tst)) return 1; } else if (dv->re_add) { /* this had better be raid1. diff --git a/mdadm.c b/mdadm.c index 67823d50..4d7025e3 100644 --- a/mdadm.c +++ b/mdadm.c @@ -1270,7 +1270,8 @@ int main(int argc, char *argv[]) export, test, homehost); continue; case 'K': /* Zero superblock */ - rv |= Kill(dv->devname, force, quiet); continue; + rv |= Kill(dv->devname, force, quiet,0); + continue; case 'Q': rv |= Query(dv->devname); continue; case 'X': diff --git a/mdadm.h b/mdadm.h index 3583e239..71903768 100644 --- a/mdadm.h +++ b/mdadm.h @@ -358,10 +358,10 @@ extern struct superswitch { int (*init_super)(struct supertype *st, mdu_array_info_t *info, unsigned long long size, char *name, char *homehost, int *uuid); - void (*add_to_super)(struct supertype *st, mdu_disk_info_t *dinfo); + void (*add_to_super)(struct supertype *st, mdu_disk_info_t *dinfo, + int fd, char *devname); int (*store_super)(struct supertype *st, int fd); - int (*write_init_super)(struct supertype *st, mdu_disk_info_t *dinfo, - char *devname); + int (*write_init_super)(struct supertype *st); int (*compare_super)(struct supertype *st, struct supertype *tst); int (*load_super)(struct supertype *st, int fd, char *devname); struct supertype * (*match_metadata_desc)(char *arg); @@ -385,6 +385,7 @@ struct supertype { int minor_version; int max_devs; void *sb; + void *info; }; extern struct supertype *super_by_fd(int fd); @@ -477,7 +478,7 @@ extern int Monitor(mddev_dev_t devlist, int period, int daemonise, int scan, int oneshot, int dosyslog, int test, char *pidfile); -extern int Kill(char *dev, int force, int quiet); +extern int Kill(char *dev, int force, int quiet, int noexcl); extern int Wait(char *dev); extern int Incremental(char *devname, int verbose, int runstop, diff --git a/super0.c b/super0.c index a1c97f88..5c2ee879 100644 --- a/super0.c +++ b/super0.c @@ -623,17 +623,35 @@ static int init_super0(struct supertype *st, mdu_array_info_t *info, return 1; } +struct devinfo { + int fd; + char *devname; + mdu_disk_info_t disk; + struct devinfo *next; +}; /* Add a device to the superblock being created */ -static void add_to_super0(struct supertype *st, mdu_disk_info_t *dinfo) +static void add_to_super0(struct supertype *st, mdu_disk_info_t *dinfo, + int fd, char *devname) { mdp_super_t *sb = st->sb; mdp_disk_t *dk = &sb->disks[dinfo->number]; + struct devinfo *di, **dip; dk->number = dinfo->number; dk->major = dinfo->major; dk->minor = dinfo->minor; dk->raid_disk = dinfo->raid_disk; dk->state = dinfo->state; + + dip = (struct devinfo **)&st->info; + while (*dip) + dip = &(*dip)->next; + di = malloc(sizeof(struct devinfo)); + di->fd = fd; + di->devname = devname; + di->disk = *dinfo; + di->next = NULL; + *dip = di; } static int store_super0(struct supertype *st, int fd) @@ -669,32 +687,39 @@ static int store_super0(struct supertype *st, int fd) return 0; } -static int write_init_super0(struct supertype *st, - mdu_disk_info_t *dinfo, char *devname) +#ifndef MDASSEMBLE +static int write_init_super0(struct supertype *st) { mdp_super_t *sb = st->sb; - int fd = open(devname, O_RDWR|O_EXCL); - int rv; + int rv = 0; + struct devinfo *di; - if (fd < 0) { - fprintf(stderr, Name ": Failed to open %s to write superblock\n", devname); - return -1; - } + for (di = st->info ; di && ! rv ; di = di->next) { - sb->disks[dinfo->number].state &= ~(1<disk.state == 1) + continue; + Kill(di->devname, 0, 1, 1); + Kill(di->devname, 0, 1, 1); - sb->this_disk = sb->disks[dinfo->number]; - sb->sb_csum = calc_sb0_csum(sb); - rv = store_super0(st, fd); + sb->disks[di->disk.number].state &= ~(1<state & (1<ss->write_bitmap(st, fd); + sb->this_disk = sb->disks[di->disk.number]; + sb->sb_csum = calc_sb0_csum(sb); + rv = store_super0(st, di->fd); - close(fd); - if (rv) - fprintf(stderr, Name ": failed to write superblock to %s\n", devname); + if (rv == 0 && (sb->state & (1<ss->write_bitmap(st, di->fd); + + if (rv) + fprintf(stderr, + Name ": failed to write superblock to %s\n", + di->devname); + close(di->fd); + di->fd = -1; + } return rv; } +#endif static int compare_super0(struct supertype *st, struct supertype *tst) { @@ -812,6 +837,7 @@ static int load_super0(struct supertype *st, int fd, char *devname) st->ss = &super0; st->minor_version = super->minor_version; st->max_devs = MD_SB_DISKS; + st->info = NULL; } /* Now check on the bitmap superblock */ @@ -844,6 +870,7 @@ static struct supertype *match_metadata_desc0(char *arg) if (!st) return st; st->ss = &super0; + st->info = NULL; st->minor_version = 90; st->max_devs = MD_SB_DISKS; st->sb = NULL; @@ -1036,6 +1063,7 @@ struct superswitch super0 = { .detail_super = detail_super0, .brief_detail_super = brief_detail_super0, .export_detail_super = export_detail_super0, + .write_init_super = write_init_super0, #endif .match_home = match_home0, .uuid_from_super = uuid_from_super0, @@ -1044,7 +1072,6 @@ struct superswitch super0 = { .init_super = init_super0, .add_to_super = add_to_super0, .store_super = store_super0, - .write_init_super = write_init_super0, .compare_super = compare_super0, .load_super = load_super0, .match_metadata_desc = match_metadata_desc0, diff --git a/super1.c b/super1.c index c5108997..211be823 100644 --- a/super1.c +++ b/super1.c @@ -767,17 +767,36 @@ static int init_super1(struct supertype *st, mdu_array_info_t *info, return 1; } +struct devinfo { + int fd; + char *devname; + mdu_disk_info_t disk; + struct devinfo *next; +}; /* Add a device to the superblock being created */ -static void add_to_super1(struct supertype *st, mdu_disk_info_t *dk) +static void add_to_super1(struct supertype *st, mdu_disk_info_t *dk, + int fd, char *devname) { struct mdp_superblock_1 *sb = st->sb; __u16 *rp = sb->dev_roles + dk->number; + struct devinfo *di, **dip; + if ((dk->state & 6) == 6) /* active, sync */ *rp = __cpu_to_le16(dk->raid_disk); else if ((dk->state & ~2) == 0) /* active or idle -> spare */ *rp = 0xffff; else *rp = 0xfffe; + + dip = (struct devinfo **)&st->info; + while (*dip) + dip = &(*dip)->next; + di = malloc(sizeof(struct devinfo)); + di->fd = fd; + di->devname = devname; + di->disk = *dk; + di->next = NULL; + *dip = di; } static void locate_bitmap1(struct supertype *st, int fd); @@ -866,123 +885,137 @@ static unsigned long choose_bm_space(unsigned long devsize) return 4*2; } -static int write_init_super1(struct supertype *st, - mdu_disk_info_t *dinfo, char *devname) +#ifndef MDASSEMBLE +static int write_init_super1(struct supertype *st) { struct mdp_superblock_1 *sb = st->sb; struct supertype refst; - int fd = open(devname, O_RDWR | O_EXCL); int rfd; - int rv; + int rv = 0; int bm_space; - + struct devinfo *di; unsigned long long dsize, array_size; long long sb_offset; + for (di = st->info; di && ! rv ; di = di->next) { + if (di->disk.state == 1) + continue; - if (fd < 0) { - fprintf(stderr, Name ": Failed to open %s to write superblock\n", - devname); - return -1; - } + Kill(di->devname, 0, 1, 1); + Kill(di->devname, 0, 1, 1); - sb->dev_number = __cpu_to_le32(dinfo->number); - if (dinfo->state & (1<devflags |= __cpu_to_le32(WriteMostly1); + if (di->fd < 0) { + fprintf(stderr, + Name": Failed to open %s to write superblock\n", + di->devname); + return -1; + } + sb->dev_number = __cpu_to_le32(di->disk.number); + if (di->disk.state & (1<devflags |= __cpu_to_le32(WriteMostly1); - if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 || - read(rfd, sb->device_uuid, 16) != 16) { - *(__u32*)(sb->device_uuid) = random(); - *(__u32*)(sb->device_uuid+4) = random(); - *(__u32*)(sb->device_uuid+8) = random(); - *(__u32*)(sb->device_uuid+12) = random(); - } - if (rfd >= 0) close(rfd); - sb->events = 0; - - refst =*st; - refst.sb = NULL; - if (load_super1(&refst, fd, NULL)==0) { - struct mdp_superblock_1 *refsb = refst.sb; - - memcpy(sb->device_uuid, refsb->device_uuid, 16); - if (memcmp(sb->set_uuid, refsb->set_uuid, 16)==0) { - /* same array, so preserve events and dev_number */ - sb->events = refsb->events; - /* bugs in 2.6.17 and earlier mean the dev_number - * chosen in Manage must be preserved - */ - if (get_linux_version() >= 2006018) - sb->dev_number = refsb->dev_number; + if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 || + read(rfd, sb->device_uuid, 16) != 16) { + *(__u32*)(sb->device_uuid) = random(); + *(__u32*)(sb->device_uuid+4) = random(); + *(__u32*)(sb->device_uuid+8) = random(); + *(__u32*)(sb->device_uuid+12) = random(); + } + if (rfd >= 0) close(rfd); + sb->events = 0; + + refst =*st; + refst.sb = NULL; + if (load_super1(&refst, di->fd, NULL)==0) { + struct mdp_superblock_1 *refsb = refst.sb; + + memcpy(sb->device_uuid, refsb->device_uuid, 16); + if (memcmp(sb->set_uuid, refsb->set_uuid, 16)==0) { + /* same array, so preserve events and + * dev_number */ + sb->events = refsb->events; + /* bugs in 2.6.17 and earlier mean the + * dev_number chosen in Manage must be preserved + */ + if (get_linux_version() >= 2006018) + sb->dev_number = refsb->dev_number; + } + free(refsb); } - free(refsb); - } - if (!get_dev_size(fd, NULL, &dsize)) - return 1; - dsize >>= 9; + if (!get_dev_size(di->fd, NULL, &dsize)) + return 1; + dsize >>= 9; - if (dsize < 24) { - close(fd); - return 2; - } + if (dsize < 24) { + close(di->fd); + return 2; + } - /* - * Calculate the position of the superblock. - * It is always aligned to a 4K boundary and - * depending on minor_version, it can be: - * 0: At least 8K, but less than 12K, from end of device - * 1: At start of device - * 2: 4K from start of device. - * Depending on the array size, we might leave extra space - * for a bitmap. - */ - array_size = __le64_to_cpu(sb->size); - /* work out how much space we left for a bitmap */ - bm_space = choose_bm_space(array_size); - - switch(st->minor_version) { - case 0: - sb_offset = dsize; - sb_offset -= 8*2; - sb_offset &= ~(4*2-1); - sb->super_offset = __cpu_to_le64(sb_offset); - sb->data_offset = __cpu_to_le64(0); + /* + * Calculate the position of the superblock. + * It is always aligned to a 4K boundary and + * depending on minor_version, it can be: + * 0: At least 8K, but less than 12K, from end of device + * 1: At start of device + * 2: 4K from start of device. + * Depending on the array size, we might leave extra space + * for a bitmap. + */ + array_size = __le64_to_cpu(sb->size); + /* work out how much space we left for a bitmap */ + bm_space = choose_bm_space(array_size); + + switch(st->minor_version) { + case 0: + sb_offset = dsize; + sb_offset -= 8*2; + sb_offset &= ~(4*2-1); + sb->super_offset = __cpu_to_le64(sb_offset); + sb->data_offset = __cpu_to_le64(0); if (sb_offset - bm_space < array_size) bm_space = sb_offset - array_size; - sb->data_size = __cpu_to_le64(sb_offset - bm_space); - break; - case 1: - sb->super_offset = __cpu_to_le64(0); - if (4*2 + bm_space + __le64_to_cpu(sb->size) > dsize) - bm_space = dsize - __le64_to_cpu(sb->size) - 4*2; - sb->data_offset = __cpu_to_le64(bm_space + 4*2); - sb->data_size = __cpu_to_le64(dsize - bm_space - 4*2); - break; - case 2: - sb_offset = 4*2; - sb->super_offset = __cpu_to_le64(4*2); - if (4*2 + 4*2 + bm_space + __le64_to_cpu(sb->size) > dsize) - bm_space = dsize - __le64_to_cpu(sb->size) - 4*2 - 4*2; - sb->data_offset = __cpu_to_le64(4*2 + 4*2 + bm_space); - sb->data_size = __cpu_to_le64(dsize - 4*2 - 4*2 - bm_space ); - break; - default: - return -EINVAL; - } + sb->data_size = __cpu_to_le64(sb_offset - bm_space); + break; + case 1: + sb->super_offset = __cpu_to_le64(0); + if (4*2 + bm_space + __le64_to_cpu(sb->size) > dsize) + bm_space = dsize - __le64_to_cpu(sb->size) -4*2; + sb->data_offset = __cpu_to_le64(bm_space + 4*2); + sb->data_size = __cpu_to_le64(dsize - bm_space - 4*2); + break; + case 2: + sb_offset = 4*2; + sb->super_offset = __cpu_to_le64(4*2); + if (4*2 + 4*2 + bm_space + __le64_to_cpu(sb->size) + > dsize) + bm_space = dsize - __le64_to_cpu(sb->size) + - 4*2 - 4*2; + sb->data_offset = __cpu_to_le64(4*2 + 4*2 + bm_space); + sb->data_size = __cpu_to_le64(dsize - 4*2 - 4*2 + - bm_space ); + break; + default: + return -EINVAL; + } - sb->sb_csum = calc_sb_1_csum(sb); - rv = store_super1(st, fd); - if (rv) - fprintf(stderr, Name ": failed to write superblock to %s\n", devname); + sb->sb_csum = calc_sb_1_csum(sb); + rv = store_super1(st, di->fd); + if (rv) + fprintf(stderr, + Name ": failed to write superblock to %s\n", + di->devname); - if (rv == 0 && (__le32_to_cpu(sb->feature_map) & 1)) - rv = st->ss->write_bitmap(st, fd); - close(fd); + if (rv == 0 && (__le32_to_cpu(sb->feature_map) & 1)) + rv = st->ss->write_bitmap(st, di->fd); + close(di->fd); + di->fd = -1; + } return rv; } +#endif static int compare_super1(struct supertype *st, struct supertype *tst) { @@ -1453,6 +1486,7 @@ struct superswitch super1 = { .detail_super = detail_super1, .brief_detail_super = brief_detail_super1, .export_detail_super = export_detail_super1, + .write_init_super = write_init_super1, #endif .match_home = match_home1, .uuid_from_super = uuid_from_super1, @@ -1461,7 +1495,6 @@ struct superswitch super1 = { .init_super = init_super1, .add_to_super = add_to_super1, .store_super = store_super1, - .write_init_super = write_init_super1, .compare_super = compare_super1, .load_super = load_super1, .match_metadata_desc = match_metadata_desc1, -- 2.39.2