Change write_init_super to be called only once.
authorNeil Brown <neilb@suse.de>
Thu, 15 May 2008 06:48:12 +0000 (16:48 +1000)
committerNeil Brown <neilb@suse.de>
Thu, 15 May 2008 06:48:12 +0000 (16:48 +1000)
The current model for creating arrays involves writing
a superblock to each device in the array.
With containers (as with DDF), that model doesn't work.
Every device in the container may need to be updated
for an array made from just some the devices in a container.

So instead of calling write_init_super for each device,
we call it once for the array and have it iterate over
all the devices in the array.

To help with this, ->add_to_super now passes in an 'fd' and name for
the device.  These get saved for use by write_init_super.  So
add_to_super takes ownership of the fd, and write_init_super will
close it.
This information is stored in the new 'info' field of supertype.

As part of this, write_init_super now removes any old traces of raid
metadata rather than doing this in common code.

Create.c
Kill.c
Manage.c
mdadm.c
mdadm.h
super0.c
super1.c

index 629d359..19793fa 100644 (file)
--- a/Create.c
+++ b/Create.c
@@ -547,33 +547,28 @@ int Create(struct supertype *st, char *mddev, int mdfd,
                                info.disk.state |= (1<<MD_DISK_WRITEMOSTLY);
 
                        if (dnum == insert_point ||
-                           strcasecmp(dv->devname, "missing")==0) {
-                               info.disk.major = 0;
-                               info.disk.minor = 0;
-                               info.disk.state = (1<<MD_DISK_FAULTY);
-                       } else {
-                               fd = open(dv->devname, O_RDONLY|O_EXCL, 0);
-                               if (fd < 0) {
-                                       fprintf(stderr, Name ": failed to open %s after earlier success - aborting\n",
-                                               dv->devname);
-                                       return 1;
-                               }
-                               fstat(fd, &stb);
-                               info.disk.major = major(stb.st_rdev);
-                               info.disk.minor = minor(stb.st_rdev);
-                               remove_partitions(fd);
-                               close(fd);
+                           strcasecmp(dv->devname, "missing")==0)
+                               continue;
+
+                       fd = open(dv->devname, O_RDWR|O_EXCL, 0);
+                       if (fd < 0) {
+                               fprintf(stderr, Name ": failed to open %s "
+                                       "after earlier success - aborting\n",
+                                       dv->devname);
+                               return 1;
                        }
+                       fstat(fd, &stb);
+                       info.disk.major = major(stb.st_rdev);
+                       info.disk.minor = minor(stb.st_rdev);
+
                        switch(pass){
                        case 1:
-                               st->ss->add_to_super(st, &info.disk);
+                               remove_partitions(fd);
+                               st->ss->add_to_super(st, &info.disk,
+                                                    fd, dv->devname);
                                break;
                        case 2:
-                               if (info.disk.state == 1) break;
-                               Kill(dv->devname, 0, 1); /* Just be sure it is clean */
-                               Kill(dv->devname, 0, 1); /* and again, there could be two superblocks */
-                               st->ss->write_init_super(st, &info.disk,
-                                                        dv->devname);
+                               close(fd);
 
                                if (ioctl(mdfd, ADD_NEW_DISK, &info.disk)) {
                                        fprintf(stderr, Name ": ADD_NEW_DISK for %s failed: %s\n",
@@ -586,6 +581,8 @@ int Create(struct supertype *st, char *mddev, int mdfd,
                        }
                        if (dv == moved_disk && dnum != insert_point) break;
                }
+               if (pass == 1)
+                       st->ss->write_init_super(st);
        }
        st->ss->free_super(st);
 
diff --git a/Kill.c b/Kill.c
index 0a2763e..5b25487 100644 (file)
--- a/Kill.c
+++ b/Kill.c
@@ -34,7 +34,7 @@
 #include       "md_u.h"
 #include       "md_p.h"
 
-int Kill(char *dev, int force, int quiet)
+int Kill(char *dev, int force, int quiet, int noexcl)
 {
        /*
         * Nothing fancy about Kill.  It just zeroes out a superblock
@@ -44,7 +44,7 @@ int Kill(char *dev, int force, int quiet)
        int fd, rv = 0;
        struct supertype *st;
 
-       fd = open(dev, O_RDWR|O_EXCL);
+       fd = open(dev, noexcl ? O_RDWR : (O_RDWR|O_EXCL));
        if (fd < 0) {
                if (!quiet)
                        fprintf(stderr, Name ": Couldn't open %s for write - not zeroing\n",
index 1fb8468..4202fd9 100644 (file)
--- a/Manage.c
+++ b/Manage.c
@@ -446,11 +446,14 @@ int Manage_subdevs(char *devname, int fd,
                        disc.number =j;
                        disc.state = 0;
                        if (array.not_persistent==0) {
+                               int dfd;
                                if (dv->writemostly)
                                        disc.state |= 1 << MD_DISK_WRITEMOSTLY;
-                               tst->ss->add_to_super(tst, &disc);
-                               if (tst->ss->write_init_super(tst, &disc,
-                                                             dv->devname))
+                               dfd = open(dv->devname, O_RDWR | O_EXCL);
+                               tst->ss->add_to_super(tst, &disc, dfd,
+                                                     dv->devname);
+                               /* write_init_super will close 'dfd' */
+                               if (tst->ss->write_init_super(tst))
                                        return 1;
                        } else if (dv->re_add) {
                                /*  this had better be raid1.
diff --git a/mdadm.c b/mdadm.c
index 67823d5..4d7025e 100644 (file)
--- a/mdadm.c
+++ b/mdadm.c
@@ -1270,7 +1270,8 @@ int main(int argc, char *argv[])
                                                     export, test, homehost);
                                        continue;
                                case 'K': /* Zero superblock */
-                                       rv |= Kill(dv->devname, force, quiet); continue;
+                                       rv |= Kill(dv->devname, force, quiet,0);
+                                       continue;
                                case 'Q':
                                        rv |= Query(dv->devname); continue;
                                case 'X':
diff --git a/mdadm.h b/mdadm.h
index 3583e23..7190376 100644 (file)
--- a/mdadm.h
+++ b/mdadm.h
@@ -358,10 +358,10 @@ extern struct superswitch {
        int (*init_super)(struct supertype *st, mdu_array_info_t *info,
                          unsigned long long size, char *name,
                          char *homehost, int *uuid);
-       void (*add_to_super)(struct supertype *st, mdu_disk_info_t *dinfo);
+       void (*add_to_super)(struct supertype *st, mdu_disk_info_t *dinfo,
+                            int fd, char *devname);
        int (*store_super)(struct supertype *st, int fd);
-       int (*write_init_super)(struct supertype *st, mdu_disk_info_t *dinfo,
-                               char *devname);
+       int (*write_init_super)(struct supertype *st);
        int (*compare_super)(struct supertype *st, struct supertype *tst);
        int (*load_super)(struct supertype *st, int fd, char *devname);
        struct supertype * (*match_metadata_desc)(char *arg);
@@ -385,6 +385,7 @@ struct supertype {
        int minor_version;
        int max_devs;
        void *sb;
+       void *info;
 };
 
 extern struct supertype *super_by_fd(int fd);
@@ -477,7 +478,7 @@ extern int Monitor(mddev_dev_t devlist,
                   int period, int daemonise, int scan, int oneshot,
                   int dosyslog, int test, char *pidfile);
 
-extern int Kill(char *dev, int force, int quiet);
+extern int Kill(char *dev, int force, int quiet, int noexcl);
 extern int Wait(char *dev);
 
 extern int Incremental(char *devname, int verbose, int runstop,
index a1c97f8..5c2ee87 100644 (file)
--- a/super0.c
+++ b/super0.c
@@ -623,17 +623,35 @@ static int init_super0(struct supertype *st, mdu_array_info_t *info,
        return 1;
 }
 
+struct devinfo {
+       int fd;
+       char *devname;
+       mdu_disk_info_t disk;
+       struct devinfo *next;
+};
 /* Add a device to the superblock being created */
-static void add_to_super0(struct supertype *st, mdu_disk_info_t *dinfo)
+static void add_to_super0(struct supertype *st, mdu_disk_info_t *dinfo,
+                         int fd, char *devname)
 {
        mdp_super_t *sb = st->sb;
        mdp_disk_t *dk = &sb->disks[dinfo->number];
+       struct devinfo *di, **dip;
 
        dk->number = dinfo->number;
        dk->major = dinfo->major;
        dk->minor = dinfo->minor;
        dk->raid_disk = dinfo->raid_disk;
        dk->state = dinfo->state;
+
+       dip = (struct devinfo **)&st->info;
+       while (*dip)
+               dip = &(*dip)->next;
+       di = malloc(sizeof(struct devinfo));
+       di->fd = fd;
+       di->devname = devname;
+       di->disk = *dinfo;
+       di->next = NULL;
+       *dip = di;
 }
 
 static int store_super0(struct supertype *st, int fd)
@@ -669,32 +687,39 @@ static int store_super0(struct supertype *st, int fd)
        return 0;
 }
 
-static int write_init_super0(struct supertype *st,
-                            mdu_disk_info_t *dinfo, char *devname)
+#ifndef MDASSEMBLE
+static int write_init_super0(struct supertype *st)
 {
        mdp_super_t *sb = st->sb;
-       int fd = open(devname, O_RDWR|O_EXCL);
-       int rv;
+       int rv = 0;
+       struct devinfo *di;
 
-       if (fd < 0) {
-               fprintf(stderr, Name ": Failed to open %s to write superblock\n", devname);
-               return -1;
-       }
+       for (di = st->info ; di && ! rv ; di = di->next) {
 
-       sb->disks[dinfo->number].state &= ~(1<<MD_DISK_FAULTY);
+               if (di->disk.state == 1)
+                       continue;
+               Kill(di->devname, 0, 1, 1);
+               Kill(di->devname, 0, 1, 1);
 
-       sb->this_disk = sb->disks[dinfo->number];
-       sb->sb_csum = calc_sb0_csum(sb);
-       rv = store_super0(st, fd);
+               sb->disks[di->disk.number].state &= ~(1<<MD_DISK_FAULTY);
 
-       if (rv == 0 && (sb->state & (1<<MD_SB_BITMAP_PRESENT)))
-               rv = st->ss->write_bitmap(st, fd);
+               sb->this_disk = sb->disks[di->disk.number];
+               sb->sb_csum = calc_sb0_csum(sb);
+               rv = store_super0(st, di->fd);
 
-       close(fd);
-       if (rv)
-               fprintf(stderr, Name ": failed to write superblock to %s\n", devname);
+               if (rv == 0 && (sb->state & (1<<MD_SB_BITMAP_PRESENT)))
+                       rv = st->ss->write_bitmap(st, di->fd);
+
+               if (rv)
+                       fprintf(stderr,
+                               Name ": failed to write superblock to %s\n",
+                               di->devname);
+               close(di->fd);
+               di->fd = -1;
+       }
        return rv;
 }
+#endif
 
 static int compare_super0(struct supertype *st, struct supertype *tst)
 {
@@ -812,6 +837,7 @@ static int load_super0(struct supertype *st, int fd, char *devname)
                st->ss = &super0;
                st->minor_version = super->minor_version;
                st->max_devs = MD_SB_DISKS;
+               st->info = NULL;
        }
 
        /* Now check on the bitmap superblock */
@@ -844,6 +870,7 @@ static struct supertype *match_metadata_desc0(char *arg)
        if (!st) return st;
 
        st->ss = &super0;
+       st->info = NULL;
        st->minor_version = 90;
        st->max_devs = MD_SB_DISKS;
        st->sb = NULL;
@@ -1036,6 +1063,7 @@ struct superswitch super0 = {
        .detail_super = detail_super0,
        .brief_detail_super = brief_detail_super0,
        .export_detail_super = export_detail_super0,
+       .write_init_super = write_init_super0,
 #endif
        .match_home = match_home0,
        .uuid_from_super = uuid_from_super0,
@@ -1044,7 +1072,6 @@ struct superswitch super0 = {
        .init_super = init_super0,
        .add_to_super = add_to_super0,
        .store_super = store_super0,
-       .write_init_super = write_init_super0,
        .compare_super = compare_super0,
        .load_super = load_super0,
        .match_metadata_desc = match_metadata_desc0,
index c510899..211be82 100644 (file)
--- a/super1.c
+++ b/super1.c
@@ -767,17 +767,36 @@ static int init_super1(struct supertype *st, mdu_array_info_t *info,
        return 1;
 }
 
+struct devinfo {
+       int fd;
+       char *devname;
+       mdu_disk_info_t disk;
+       struct devinfo *next;
+};
 /* Add a device to the superblock being created */
-static void add_to_super1(struct supertype *st, mdu_disk_info_t *dk)
+static void add_to_super1(struct supertype *st, mdu_disk_info_t *dk,
+                         int fd, char *devname)
 {
        struct mdp_superblock_1 *sb = st->sb;
        __u16 *rp = sb->dev_roles + dk->number;
+       struct devinfo *di, **dip;
+
        if ((dk->state & 6) == 6) /* active, sync */
                *rp = __cpu_to_le16(dk->raid_disk);
        else if ((dk->state & ~2) == 0) /* active or idle -> spare */
                *rp = 0xffff;
        else
                *rp = 0xfffe;
+
+       dip = (struct devinfo **)&st->info;
+       while (*dip)
+               dip = &(*dip)->next;
+       di = malloc(sizeof(struct devinfo));
+       di->fd = fd;
+       di->devname = devname;
+       di->disk = *dk;
+       di->next = NULL;
+       *dip = di;
 }
 
 static void locate_bitmap1(struct supertype *st, int fd);
@@ -866,123 +885,137 @@ static unsigned long choose_bm_space(unsigned long devsize)
        return 4*2;
 }
 
-static int write_init_super1(struct supertype *st,
-                            mdu_disk_info_t *dinfo, char *devname)
+#ifndef MDASSEMBLE
+static int write_init_super1(struct supertype *st)
 {
        struct mdp_superblock_1 *sb = st->sb;
        struct supertype refst;
-       int fd = open(devname, O_RDWR | O_EXCL);
        int rfd;
-       int rv;
+       int rv = 0;
        int bm_space;
-
+       struct devinfo *di;
        unsigned long long dsize, array_size;
        long long sb_offset;
 
+       for (di = st->info; di && ! rv ; di = di->next) {
+               if (di->disk.state == 1)
+                       continue;
 
-       if (fd < 0) {
-               fprintf(stderr, Name ": Failed to open %s to write superblock\n",
-                       devname);
-               return -1;
-       }
+               Kill(di->devname, 0, 1, 1);
+               Kill(di->devname, 0, 1, 1);
 
-       sb->dev_number = __cpu_to_le32(dinfo->number);
-       if (dinfo->state & (1<<MD_DISK_WRITEMOSTLY))
-               sb->devflags |= __cpu_to_le32(WriteMostly1);
+               if (di->fd < 0) {
+                       fprintf(stderr,
+                               Name": Failed to open %s to write superblock\n",
+                               di->devname);
+                       return -1;
+               }
+               sb->dev_number = __cpu_to_le32(di->disk.number);
+               if (di->disk.state & (1<<MD_DISK_WRITEMOSTLY))
+                       sb->devflags |= __cpu_to_le32(WriteMostly1);
 
-       if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 ||
-           read(rfd, sb->device_uuid, 16) != 16) {
-               *(__u32*)(sb->device_uuid) = random();
-               *(__u32*)(sb->device_uuid+4) = random();
-               *(__u32*)(sb->device_uuid+8) = random();
-               *(__u32*)(sb->device_uuid+12) = random();
-       }
-       if (rfd >= 0) close(rfd);
-       sb->events = 0;
-
-       refst =*st;
-       refst.sb = NULL;
-       if (load_super1(&refst, fd, NULL)==0) {
-               struct mdp_superblock_1 *refsb = refst.sb;
-
-               memcpy(sb->device_uuid, refsb->device_uuid, 16);
-               if (memcmp(sb->set_uuid, refsb->set_uuid, 16)==0) {
-                       /* same array, so preserve events and dev_number */
-                       sb->events = refsb->events;
-                       /* bugs in 2.6.17 and earlier mean the dev_number
-                        * chosen in Manage must be preserved
-                        */
-                       if (get_linux_version() >= 2006018)
-                               sb->dev_number = refsb->dev_number;
+               if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 ||
+                   read(rfd, sb->device_uuid, 16) != 16) {
+                       *(__u32*)(sb->device_uuid) = random();
+                       *(__u32*)(sb->device_uuid+4) = random();
+                       *(__u32*)(sb->device_uuid+8) = random();
+                       *(__u32*)(sb->device_uuid+12) = random();
+               }
+               if (rfd >= 0) close(rfd);
+               sb->events = 0;
+
+               refst =*st;
+               refst.sb = NULL;
+               if (load_super1(&refst, di->fd, NULL)==0) {
+                       struct mdp_superblock_1 *refsb = refst.sb;
+
+                       memcpy(sb->device_uuid, refsb->device_uuid, 16);
+                       if (memcmp(sb->set_uuid, refsb->set_uuid, 16)==0) {
+                               /* same array, so preserve events and
+                                * dev_number */
+                               sb->events = refsb->events;
+                               /* bugs in 2.6.17 and earlier mean the
+                                * dev_number chosen in Manage must be preserved
+                                */
+                               if (get_linux_version() >= 2006018)
+                                       sb->dev_number = refsb->dev_number;
+                       }
+                       free(refsb);
                }
-               free(refsb);
-       }
 
-       if (!get_dev_size(fd, NULL, &dsize))
-               return 1;
-       dsize >>= 9;
+               if (!get_dev_size(di->fd, NULL, &dsize))
+                       return 1;
+               dsize >>= 9;
 
-       if (dsize < 24) {
-               close(fd);
-               return 2;
-       }
+               if (dsize < 24) {
+                       close(di->fd);
+                       return 2;
+               }
 
 
-       /*
-        * Calculate the position of the superblock.
-        * It is always aligned to a 4K boundary and
-        * depending on minor_version, it can be:
-        * 0: At least 8K, but less than 12K, from end of device
-        * 1: At start of device
-        * 2: 4K from start of device.
-        * Depending on the array size, we might leave extra space
-        * for a bitmap.
-        */
-       array_size = __le64_to_cpu(sb->size);
-       /* work out how much space we left for a bitmap */
-       bm_space = choose_bm_space(array_size);
-
-       switch(st->minor_version) {
-       case 0:
-               sb_offset = dsize;
-               sb_offset -= 8*2;
-               sb_offset &= ~(4*2-1);
-               sb->super_offset = __cpu_to_le64(sb_offset);
-               sb->data_offset = __cpu_to_le64(0);
+               /*
+                * Calculate the position of the superblock.
+                * It is always aligned to a 4K boundary and
+                * depending on minor_version, it can be:
+                * 0: At least 8K, but less than 12K, from end of device
+                * 1: At start of device
+                * 2: 4K from start of device.
+                * Depending on the array size, we might leave extra space
+                * for a bitmap.
+                */
+               array_size = __le64_to_cpu(sb->size);
+               /* work out how much space we left for a bitmap */
+               bm_space = choose_bm_space(array_size);
+
+               switch(st->minor_version) {
+               case 0:
+                       sb_offset = dsize;
+                       sb_offset -= 8*2;
+                       sb_offset &= ~(4*2-1);
+                       sb->super_offset = __cpu_to_le64(sb_offset);
+                       sb->data_offset = __cpu_to_le64(0);
                if (sb_offset - bm_space < array_size)
                        bm_space = sb_offset - array_size;
-               sb->data_size = __cpu_to_le64(sb_offset - bm_space);
-               break;
-       case 1:
-               sb->super_offset = __cpu_to_le64(0);
-               if (4*2 + bm_space + __le64_to_cpu(sb->size) > dsize)
-                       bm_space = dsize - __le64_to_cpu(sb->size) - 4*2;
-               sb->data_offset = __cpu_to_le64(bm_space + 4*2);
-               sb->data_size = __cpu_to_le64(dsize - bm_space - 4*2);
-               break;
-       case 2:
-               sb_offset = 4*2;
-               sb->super_offset = __cpu_to_le64(4*2);
-               if (4*2 + 4*2 + bm_space + __le64_to_cpu(sb->size) > dsize)
-                       bm_space = dsize - __le64_to_cpu(sb->size) - 4*2 - 4*2;
-               sb->data_offset = __cpu_to_le64(4*2 + 4*2 + bm_space);
-               sb->data_size = __cpu_to_le64(dsize - 4*2 - 4*2 - bm_space );
-               break;
-       default:
-               return -EINVAL;
-       }
+                       sb->data_size = __cpu_to_le64(sb_offset - bm_space);
+                       break;
+               case 1:
+                       sb->super_offset = __cpu_to_le64(0);
+                       if (4*2 + bm_space + __le64_to_cpu(sb->size) > dsize)
+                               bm_space = dsize - __le64_to_cpu(sb->size) -4*2;
+                       sb->data_offset = __cpu_to_le64(bm_space + 4*2);
+                       sb->data_size = __cpu_to_le64(dsize - bm_space - 4*2);
+                       break;
+               case 2:
+                       sb_offset = 4*2;
+                       sb->super_offset = __cpu_to_le64(4*2);
+                       if (4*2 + 4*2 + bm_space + __le64_to_cpu(sb->size)
+                           > dsize)
+                               bm_space = dsize - __le64_to_cpu(sb->size)
+                                       - 4*2 - 4*2;
+                       sb->data_offset = __cpu_to_le64(4*2 + 4*2 + bm_space);
+                       sb->data_size = __cpu_to_le64(dsize - 4*2 - 4*2
+                                                     - bm_space );
+                       break;
+               default:
+                       return -EINVAL;
+               }
 
 
-       sb->sb_csum = calc_sb_1_csum(sb);
-       rv = store_super1(st, fd);
-       if (rv)
-               fprintf(stderr, Name ": failed to write superblock to %s\n", devname);
+               sb->sb_csum = calc_sb_1_csum(sb);
+               rv = store_super1(st, di->fd);
+               if (rv)
+                       fprintf(stderr,
+                               Name ": failed to write superblock to %s\n",
+                               di->devname);
 
-       if (rv == 0 && (__le32_to_cpu(sb->feature_map) & 1))
-               rv = st->ss->write_bitmap(st, fd);
-       close(fd);
+               if (rv == 0 && (__le32_to_cpu(sb->feature_map) & 1))
+                       rv = st->ss->write_bitmap(st, di->fd);
+               close(di->fd);
+               di->fd = -1;
+       }
        return rv;
 }
+#endif
 
 static int compare_super1(struct supertype *st, struct supertype *tst)
 {
@@ -1453,6 +1486,7 @@ struct superswitch super1 = {
        .detail_super = detail_super1,
        .brief_detail_super = brief_detail_super1,
        .export_detail_super = export_detail_super1,
+       .write_init_super = write_init_super1,
 #endif
        .match_home = match_home1,
        .uuid_from_super = uuid_from_super1,
@@ -1461,7 +1495,6 @@ struct superswitch super1 = {
        .init_super = init_super1,
        .add_to_super = add_to_super1,
        .store_super = store_super1,
-       .write_init_super = write_init_super1,
        .compare_super = compare_super1,
        .load_super = load_super1,
        .match_metadata_desc = match_metadata_desc1,