]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - Grow.c
imsm: set imsm spare uuid to 0
[thirdparty/mdadm.git] / Grow.c
diff --git a/Grow.c b/Grow.c
index 3923a90fded10dd1dd050101b7f7d40bc41b08b2..21e14373da579343b93ba93d906a8dcea0f7e69b 100644 (file)
--- a/Grow.c
+++ b/Grow.c
@@ -51,33 +51,41 @@ int Grow_Add_device(char *devname, int fd, char *newdev)
        int nfd, fd2;
        int d, nd;
        struct supertype *st = NULL;
-
+       char *subarray = NULL;
 
        if (ioctl(fd, GET_ARRAY_INFO, &info.array) < 0) {
                fprintf(stderr, Name ": cannot get array info for %s\n", devname);
                return 1;
        }
 
-       st = super_by_fd(fd);
+       if (info.array.level != -1) {
+               fprintf(stderr, Name ": can only add devices to linear arrays\n");
+               return 1;
+       }
+
+       st = super_by_fd(fd, &subarray);
        if (!st) {
                fprintf(stderr, Name ": cannot handle arrays with superblock version %d\n", info.array.major_version);
                return 1;
        }
 
-       if (info.array.level != -1) {
-               fprintf(stderr, Name ": can only add devices to linear arrays\n");
-               return 1;
+       if (subarray) {
+               fprintf(stderr, Name ": Cannot grow linear sub-arrays yet\n");
+               free(subarray);
+               free(st);
        }
 
        nfd = open(newdev, O_RDWR|O_EXCL|O_DIRECT);
        if (nfd < 0) {
                fprintf(stderr, Name ": cannot open %s\n", newdev);
+               free(st);
                return 1;
        }
        fstat(nfd, &stb);
        if ((stb.st_mode & S_IFMT) != S_IFBLK) {
                fprintf(stderr, Name ": %s is not a block device!\n", newdev);
                close(nfd);
+               free(st);
                return 1;
        }
        /* now check out all the devices and make sure we can read the superblock */
@@ -85,28 +93,37 @@ int Grow_Add_device(char *devname, int fd, char *newdev)
                mdu_disk_info_t disk;
                char *dv;
 
+               st->ss->free_super(st);
+
                disk.number = d;
                if (ioctl(fd, GET_DISK_INFO, &disk) < 0) {
                        fprintf(stderr, Name ": cannot get device detail for device %d\n",
                                d);
+                       close(nfd);
+                       free(st);
                        return 1;
                }
                dv = map_dev(disk.major, disk.minor, 1);
                if (!dv) {
                        fprintf(stderr, Name ": cannot find device file for device %d\n",
                                d);
+                       close(nfd);
+                       free(st);
                        return 1;
                }
                fd2 = dev_open(dv, O_RDWR);
                if (!fd2) {
                        fprintf(stderr, Name ": cannot open device file %s\n", dv);
+                       close(nfd);
+                       free(st);
                        return 1;
                }
-               st->ss->free_super(st);
 
                if (st->ss->load_super(st, fd2, NULL)) {
                        fprintf(stderr, Name ": cannot find super block on %s\n", dv);
+                       close(nfd);
                        close(fd2);
+                       free(st);
                        return 1;
                }
                close(fd2);
@@ -204,16 +221,17 @@ int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int
        mdu_bitmap_file_t bmf;
        mdu_array_info_t array;
        struct supertype *st;
+       char *subarray = NULL;
        int major = BITMAP_MAJOR_HI;
        int vers = md_get_version(fd);
        unsigned long long bitmapsize, array_size;
 
        if (vers < 9003) {
                major = BITMAP_MAJOR_HOSTENDIAN;
-#ifdef __BIG_ENDIAN
-               fprintf(stderr, Name ": Warning - bitmaps created on this kernel are not portable\n"
-                       "  between different architectured.  Consider upgrading the Linux kernel.\n");
-#endif
+               fprintf(stderr, Name ": Warning - bitmaps created on this kernel"
+                       " are not portable\n"
+                       "  between different architectures.  Consider upgrading"
+                       " the Linux kernel.\n");
        }
 
        if (ioctl(fd, GET_BITMAP_FILE, &bmf) != 0) {
@@ -253,6 +271,11 @@ int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int
                        devname);
                return 1;
        }
+
+       if (strcmp(file, "none") == 0) {
+               fprintf(stderr, Name ": no bitmap found on %s\n", devname);
+               return 1;
+       }
        if (array.level <= 0) {
                fprintf(stderr, Name ": Bitmaps not meaningful with level %s\n",
                        map_num(pers, array.level)?:"of this array");
@@ -277,16 +300,19 @@ int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int
                bitmapsize = bitmapsize * array.raid_disks / ncopies;
        }
 
-       st = super_by_fd(fd);
+       st = super_by_fd(fd, &subarray);
        if (!st) {
                fprintf(stderr, Name ": Cannot understand version %d.%d\n",
                        array.major_version, array.minor_version);
                return 1;
        }
-       if (strcmp(file, "none") == 0) {
-               fprintf(stderr, Name ": no bitmap found on %s\n", devname);
+       if (subarray) {
+               fprintf(stderr, Name ": Cannot add bitmaps to sub-arrays yet\n");
+               free(subarray);
+               free(st);
                return 1;
-       } else if (strcmp(file, "internal") == 0) {
+       }
+       if (strcmp(file, "internal") == 0) {
                int d;
                if (st->ss->add_internal_bitmap == NULL) {
                        fprintf(stderr, Name ": Internal bitmaps not supported "
@@ -327,6 +353,10 @@ int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int
                }
                array.state |= (1<<MD_SB_BITMAP_PRESENT);
                if (ioctl(fd, SET_ARRAY_INFO, &array)!= 0) {
+                       if (errno == EBUSY)
+                               fprintf(stderr, Name
+                                       ": Cannot add bitmap while array is"
+                                       " resyncing or reshaping etc.\n");
                        fprintf(stderr, Name ": failed to set internal bitmap.\n");
                        return 1;
                }
@@ -373,8 +403,13 @@ int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int
                        return 1;
                }
                if (ioctl(fd, SET_BITMAP_FILE, bitmap_fd) < 0) {
+                       int err = errno;
+                       if (errno == EBUSY)
+                               fprintf(stderr, Name
+                                       ": Cannot add bitmap while array is"
+                                       " resyncing or reshaping etc.\n");
                        fprintf(stderr, Name ": Cannot set bitmap file for %s: %s\n",
-                               devname, strerror(errno));
+                               devname, strerror(err));
                        return 1;
                }
        }
@@ -409,7 +444,7 @@ static struct mdp_backup_super {
        __u8 pad[512-68-32];
 } __attribute__((aligned(512))) bsb, bsb2;
 
-int bsb_csum(char *buf, int len)
+static __u32 bsb_csum(char *buf, int len)
 {
        int i;
        int csum = 0;
@@ -432,53 +467,618 @@ static int child_same_size(int afd, struct mdinfo *sra, unsigned long blocks,
                           int disks, int chunk, int level, int layout, int data,
                           int dests, int *destfd, unsigned long long *destoffsets);
 
-int freeze_array(struct mdinfo *sra)
+static int check_idle(struct supertype *st)
 {
-       /* Try to freeze resync on this array.
+       /* Check that all member arrays for this container, or the
+        * container of this array, are idle
+        */
+       int container_dev = (st->container_dev != NoMdDev
+                            ? st->container_dev : st->devnum);
+       char container[40];
+       struct mdstat_ent *ent, *e;
+       int is_idle = 1;
+       
+       fmt_devname(container, container_dev);
+       ent = mdstat_read(0, 0);
+       for (e = ent ; e; e = e->next) {
+               if (!is_container_member(e, container))
+                       continue;
+               if (e->percent >= 0) {
+                       is_idle = 0;
+                       break;
+               }
+       }
+       free_mdstat(ent);
+       return is_idle;
+}
+
+static int freeze_container(struct supertype *st)
+{
+       int container_dev = (st->container_dev != NoMdDev
+                            ? st->container_dev : st->devnum);
+       char container[40];
+
+       if (!check_idle(st))
+               return -1;
+       
+       fmt_devname(container, container_dev);
+
+       if (block_monitor(container, 1)) {
+               fprintf(stderr, Name ": failed to freeze container\n");
+               return -2;
+       }
+
+       return 1;
+}
+
+static void unfreeze_container(struct supertype *st)
+{
+       int container_dev = (st->container_dev != NoMdDev
+                            ? st->container_dev : st->devnum);
+       char container[40];
+       
+       fmt_devname(container, container_dev);
+
+       unblock_monitor(container, 1);
+}
+
+static int freeze(struct supertype *st)
+{
+       /* Try to freeze resync/rebuild on this array/container.
         * Return -1 if the array is busy,
+        * return -2 container cannot be frozen,
         * return 0 if this kernel doesn't support 'frozen'
         * return 1 if it worked.
         */
-       char buf[20];
-       if (sysfs_get_str(sra, NULL, "sync_action", buf, 20) <= 0)
-               return 0;
-       if (strcmp(buf, "idle\n") != 0 &&
-           strcmp(buf, "frozen\n") != 0)
-               return -1;
-       if (sysfs_set_str(sra, NULL, "sync_action", "frozen") < 0)
-               return 0;
-       return 1;
+       if (st->ss->external)
+               return freeze_container(st);
+       else {
+               struct mdinfo *sra = sysfs_read(-1, st->devnum, GET_VERSION);
+               int err;
+
+               if (!sra)
+                       return -1;
+               err = sysfs_freeze_array(sra);
+               sysfs_free(sra);
+               return err;
+       }
 }
 
-void unfreeze_array(struct mdinfo *sra, int frozen)
+static void unfreeze(struct supertype *st, int frozen)
 {
        /* If 'frozen' is 1, unfreeze the array */
-       if (frozen > 0)
-               sysfs_set_str(sra, NULL, "sync_action", "idle");
+       if (frozen <= 0)
+               return;
+
+       if (st->ss->external)
+               return unfreeze_container(st);
+       else {
+               struct mdinfo *sra = sysfs_read(-1, st->devnum, GET_VERSION);
+
+               if (sra)
+                       sysfs_set_str(sra, NULL, "sync_action", "idle");
+               else
+                       fprintf(stderr, Name ": failed to unfreeze array\n");
+               sysfs_free(sra);
+       }
 }
 
-void wait_reshape(struct mdinfo *sra)
+static void wait_reshape(struct mdinfo *sra)
 {
        int fd = sysfs_get_fd(sra, NULL, "sync_action");
        char action[20];
 
-       do {
+       if (fd < 0)
+               return;
+
+       while  (sysfs_fd_get_str(fd, action, 20) > 0 &&
+               strncmp(action, "reshape", 7) == 0) {
                fd_set rfds;
                FD_ZERO(&rfds);
                FD_SET(fd, &rfds);
                select(fd+1, NULL, NULL, &rfds, NULL);
-               
-               if (sysfs_fd_get_str(fd, action, 20) < 0) {
-                       close(fd);
-                       return;
+       }
+       close(fd);
+}
+
+static int reshape_super(struct supertype *st, long long size, int level,
+                        int layout, int chunksize, int raid_disks,
+                        char *backup_file, char *dev, int verbose)
+{
+       /* nothing extra to check in the native case */
+       if (!st->ss->external)
+               return 0;
+       if (!st->ss->reshape_super ||
+           !st->ss->manage_reshape) {
+               fprintf(stderr, Name ": %s metadata does not support reshape\n",
+                       st->ss->name);
+               return 1;
+       }
+
+       return st->ss->reshape_super(st, size, level, layout, chunksize,
+                                    raid_disks, backup_file, dev, verbose);
+}
+
+static void sync_metadata(struct supertype *st)
+{
+       if (st->ss->external) {
+               if (st->update_tail) {
+                       flush_metadata_updates(st);
+                       st->update_tail = &st->updates;
+               } else
+                       st->ss->sync_metadata(st);
+       }
+}
+
+static int subarray_set_num(char *container, struct mdinfo *sra, char *name, int n)
+{
+       /* when dealing with external metadata subarrays we need to be
+        * prepared to handle EAGAIN.  The kernel may need to wait for
+        * mdmon to mark the array active so the kernel can handle
+        * allocations/writeback when preparing the reshape action
+        * (md_allow_write()).  We temporarily disable safe_mode_delay
+        * to close a race with the array_state going clean before the
+        * next write to raid_disks / stripe_cache_size
+        */
+       char safe[50];
+       int rc;
+
+       /* only 'raid_disks' and 'stripe_cache_size' trigger md_allow_write */
+       if (strcmp(name, "raid_disks") != 0 &&
+           strcmp(name, "stripe_cache_size") != 0)
+               return sysfs_set_num(sra, NULL, name, n);
+
+       rc = sysfs_get_str(sra, NULL, "safe_mode_delay", safe, sizeof(safe));
+       if (rc <= 0)
+               return -1;
+       sysfs_set_num(sra, NULL, "safe_mode_delay", 0);
+       rc = sysfs_set_num(sra, NULL, name, n);
+       if (rc < 0 && errno == EAGAIN) {
+               ping_monitor(container);
+               /* if we get EAGAIN here then the monitor is not active
+                * so stop trying
+                */
+               rc = sysfs_set_num(sra, NULL, name, n);
+       }
+       sysfs_set_str(sra, NULL, "safe_mode_delay", safe);
+       return rc;
+}
+
+int start_reshape(struct mdinfo *sra)
+{
+       int err;
+       err = sysfs_set_num(sra, NULL, "suspend_hi", 0);
+       err = err ?: sysfs_set_num(sra, NULL, "suspend_lo", 0);
+       err = err ?: sysfs_set_num(sra, NULL, "sync_min", 0);
+       err = err ?: sysfs_set_num(sra, NULL, "sync_max", 0);
+       err = err ?: sysfs_set_str(sra, NULL, "sync_action", "reshape");
+
+       return err;
+}
+
+void abort_reshape(struct mdinfo *sra)
+{
+       sysfs_set_str(sra, NULL, "sync_action", "idle");
+       sysfs_set_num(sra, NULL, "suspend_lo", 0x7FFFFFFFFFFFFFFFULL);
+       sysfs_set_num(sra, NULL, "suspend_hi", 0);
+       sysfs_set_num(sra, NULL, "suspend_lo", 0);
+       sysfs_set_num(sra, NULL, "sync_min", 0);
+       sysfs_set_str(sra, NULL, "sync_max", "max");
+}
+
+static int reshape_container_raid_disks(struct supertype *st,
+                                       char *container, int raid_disks)
+{
+       /* for each subarray switch to a raid level that can
+        * support the reshape, and set raid disks
+        */
+       struct mdstat_ent *ent, *e;
+       int changed = 0, rv = 0, err = 0;
+
+       ent = mdstat_read(1, 0);
+       if (!ent) {
+               fprintf(stderr, Name ": unable to read /proc/mdstat\n");
+               return -1;
+       }
+
+       changed = 0;
+       for (e = ent; e; e = e->next) {
+               struct mdinfo *sub, *info;
+               unsigned int cache;
+               int level, takeover_delta = 0;
+               int parity_disks = 1;
+               unsigned int odata;
+               unsigned long blocks;
+               char *subarray;
+
+               if (!is_container_member(e, container))
+                       continue;
+               subarray = strchr(e->metadata_version+10, '/')+1;
+
+               rv = -1;
+               level = map_name(pers, e->level);
+               if (level == 0) {
+                       sub = sysfs_read(-1, e->devnum, GET_VERSION);
+                       if (!sub)
+                               break;
+                       /* metadata records 'orig_level' */
+                       rv = sysfs_set_num(sub, NULL, "level", 4);
+                       if (rv < 0) {
+                               err = errno;
+                               break;
+                       }
+                       /* we want spares to be used for capacity
+                        * expansion, not rebuild
+                        */
+                       takeover_delta = 1;
+
+                       sysfs_free(sub);
+                       level = 4;
+               }
+               rv = -1;
+               sub = NULL;
+               switch (level) {
+               default:
+                       break;
+               case 6:
+                       parity_disks++;
+               case 4:
+               case 5:
+                       sub = sysfs_read(-1, e->devnum, GET_CHUNK|GET_CACHE|GET_DISKS);
+                       if (!sub)
+                               break;
+                       cache = (sub->array.chunk_size / 4096) * 4;
+                       odata = sub->array.raid_disks - parity_disks;
+                       blocks = compute_backup_blocks(sub->array.chunk_size,
+                                                      sub->array.chunk_size,
+                                                      raid_disks - parity_disks,
+                                                      odata);
+                       if (cache < blocks / 8 / odata + 16)
+                               /* Make it big enough to hold 'blocks' */
+                               cache = blocks / 8 / odata + 16;
+                       if (cache > sub->cache_size)
+                               rv = subarray_set_num(container, sub,
+                                                     "stripe_cache_size", cache);
+                       if (rv) {
+                               err = errno;
+                               break;
+                       }
+                       rv = -1;
+                       /* fall through */
+               case 1:
+                       if (!sub)
+                               sub = sysfs_read(-1, e->devnum, GET_VERSION);
+                       if (!sub)
+                               break;
+
+                       rv = subarray_set_num(container, sub, "raid_disks",
+                                             raid_disks + takeover_delta);
+                       if (rv)
+                               err = errno;
+                       else
+                               changed++;
+                       break;
+               }
+
+               /* add the devices that were chosen */
+               info = st->ss->container_content(st, subarray);
+               if (info) {
+                       struct mdinfo *d;
+                       for (d = info->devs; d; d = d->next) {
+                               if (d->disk.state == 0 &&
+                                   d->disk.raid_disk >= 0) {
+                                       /* This is a spare that wants to
+                                        * be part of the array.
+                                        */
+                                       add_disk(-1, st, info, d);
+                               }
+                       }
                }
-       } while  (strncmp(action, "reshape", 7) == 0);
+               sysfs_free(info);
+
+               if (!rv && level > 1)
+                       start_reshape(sub);
+               sysfs_free(sub);
+               if (rv)
+                       break;
+       }
+       free_mdstat(ent);
+       if (rv) {
+               fprintf(stderr, Name
+                       ": failed to initiate container reshape%s%s\n",
+                       err ? ": " : "", err ? strerror(err) : "");
+               return rv;
+       }
+
+       return changed;
 }
-                       
-               
+
+static void revert_container_raid_disks(struct supertype *st, int fd, char *container)
+{
+       /* we failed to prepare all subarrays in the container for
+        * reshape, so cancel the changes and restore the nominal raid
+        * level
+        */
+       struct mdstat_ent *ent, *e;
+
+       ent = mdstat_read(0, 0);
+       if (!ent) {
+               fprintf(stderr, Name
+                       ": failed to read /proc/mdstat while aborting reshape\n");
+               return;
+       }
+
+       if (st->ss->load_container(st, fd, NULL)) {
+               fprintf(stderr, Name
+                       ": failed read metadata while aborting reshape\n");
+               return ;
+       }
+
+
+       for (e = ent; e; e = e->next) {
+               int level_fixed = 0, disks_fixed = 0;
+               struct mdinfo *sub, *prev;
+               char *subarray;
+
+               if (!is_container_member(e, container))
+                       continue;
+
+               subarray = to_subarray(e, container);
+               prev = st->ss->container_content(st, subarray);
+
+               /* changing level might change raid_disks so we do it
+                * first and then check if raid_disks still needs fixing
+                */
+               if (map_name(pers, e->level) != prev->array.level) {
+                       sub = sysfs_read(-1, e->devnum, GET_VERSION);
+                       if (sub &&
+                           !sysfs_set_num(sub, NULL, "level", prev->array.level))
+                               level_fixed = 1;
+                       sysfs_free(sub);
+               } else
+                       level_fixed = 1;
+
+               sub = sysfs_read(-1, e->devnum, GET_DISKS);
+               if (sub && sub->array.raid_disks != prev->array.raid_disks) {
+                       if (!subarray_set_num(container, sub, "raid_disks",
+                                             prev->array.raid_disks))
+                               disks_fixed = 1;
+               } else if (sub)
+                       disks_fixed = 1;
+
+               if (sub)
+                       abort_reshape(sub);
+               sysfs_free(sub);
+
+               if (!disks_fixed || !level_fixed)
+                       fprintf(stderr, Name
+                               ": failed to restore %s to a %d-disk %s array\n",
+                               e->dev, prev->array.raid_disks,
+                               map_num(pers, prev->array.level));
+               free(prev);
+       }
+       st->ss->free_super(st);
+       free_mdstat(ent);
+}
+
+int remove_disks_on_raid10_to_raid0_takeover(struct supertype *st,
+                                            struct mdinfo *sra,
+                                            int layout)
+{
+       int nr_of_copies;
+       struct mdinfo *remaining;
+       int slot;
+
+       nr_of_copies = layout & 0xff;
+
+       remaining = sra->devs;
+       sra->devs = NULL;
+       /* for each 'copy', select one device and remove from the list. */
+       for (slot = 0; slot < sra->array.raid_disks; slot += nr_of_copies) {
+               struct mdinfo **diskp;
+               int found = 0;
+
+               /* Find a working device to keep */
+               for (diskp =  &remaining; *diskp ; diskp = &(*diskp)->next) {
+                       struct mdinfo *disk = *diskp;
+
+                       if (disk->disk.raid_disk < slot)
+                               continue;
+                       if (disk->disk.raid_disk >= slot + nr_of_copies)
+                               continue;
+                       if (disk->disk.state & (1<<MD_DISK_REMOVED))
+                               continue;
+                       if (disk->disk.state & (1<<MD_DISK_FAULTY))
+                               continue;
+                       if (!(disk->disk.state & (1<<MD_DISK_SYNC)))
+                               continue;
+
+                       /* We have found a good disk to use! */
+                       *diskp = disk->next;
+                       disk->next = sra->devs;
+                       sra->devs = disk;
+                       found = 1;
+                       break;
+               }
+               if (!found)
+                       break;
+       }
+
+       if (slot < sra->array.raid_disks) {
+               /* didn't find all slots */
+               struct mdinfo **e;
+               e = &remaining;
+               while (*e)
+                       e = &(*e)->next;
+               *e = sra->devs;
+               sra->devs = remaining;
+               return 1;
+       }
+
+       /* Remove all 'remaining' devices from the array */
+       while (remaining) {
+               struct mdinfo *sd = remaining;
+               remaining = sd->next;
+
+               sysfs_set_str(sra, sd, "state", "faulty");
+               sysfs_set_str(sra, sd, "slot", "none");
+               sysfs_set_str(sra, sd, "state", "remove");
+               sd->disk.state |= (1<<MD_DISK_REMOVED);
+               sd->disk.state &= ~(1<<MD_DISK_SYNC);
+               sd->next = sra->devs;
+               sra->devs = sd;
+       }
+       return 0;
+}
+
+void reshape_free_fdlist(int *fdlist,
+                        unsigned long long *offsets,
+                        int size)
+{
+       int i;
+
+       for (i = 0; i < size; i++)
+               if (fdlist[i] >= 0)
+                       close(fdlist[i]);
+
+       free(fdlist);
+       free(offsets);
+}
+
+int reshape_prepare_fdlist(char *devname,
+                          struct mdinfo *sra,
+                          int raid_disks,
+                          int nrdisks,
+                          unsigned long blocks,
+                          char *backup_file,
+                          int *fdlist,
+                          unsigned long long *offsets)
+{
+       int d = 0;
+       struct mdinfo *sd;
+
+       for (d = 0; d <= nrdisks; d++)
+               fdlist[d] = -1;
+       d = raid_disks;
+       for (sd = sra->devs; sd; sd = sd->next) {
+               if (sd->disk.state & (1<<MD_DISK_FAULTY))
+                       continue;
+               if (sd->disk.state & (1<<MD_DISK_SYNC)) {
+                       char *dn = map_dev(sd->disk.major,
+                                          sd->disk.minor, 1);
+                       fdlist[sd->disk.raid_disk]
+                               = dev_open(dn, O_RDONLY);
+                       offsets[sd->disk.raid_disk] = sd->data_offset*512;
+                       if (fdlist[sd->disk.raid_disk] < 0) {
+                               fprintf(stderr,
+                                       Name ": %s: cannot open component %s\n",
+                                       devname, dn ? dn : "-unknown-");
+                               d = -1;
+                               goto release;
+                       }
+               } else if (backup_file == NULL) {
+                       /* spare */
+                       char *dn = map_dev(sd->disk.major,
+                                          sd->disk.minor, 1);
+                               fdlist[d] = dev_open(dn, O_RDWR);
+                               offsets[d] = (sd->data_offset + sra->component_size - blocks - 8)*512;
+                               if (fdlist[d] < 0) {
+                                       fprintf(stderr, Name ": %s: cannot open component %s\n",
+                                               devname, dn ? dn : "-unknown-");
+                                       d = -1;
+                                       goto release;
+                               }
+                               d++;
+                       }
+               }
+release:
+       return d;
+}
+
+int reshape_open_backup_file(char *backup_file,
+                            int fd,
+                            char *devname,
+                            long blocks,
+                            int *fdlist,
+                            unsigned long long *offsets)
+{
+       /* Return 1 on success, 0 on any form of failure */
+       /* need to check backup file is large enough */
+       char buf[512];
+       struct stat stb;
+       unsigned int dev;
+       int i;
+
+       *fdlist = open(backup_file, O_RDWR|O_CREAT|O_EXCL,
+                      S_IRUSR | S_IWUSR);
+       *offsets = 8 * 512;
+       if (*fdlist < 0) {
+               fprintf(stderr, Name ": %s: cannot create backup file %s: %s\n",
+                       devname, backup_file, strerror(errno));
+               return 0;
+       }
+       /* Guard against backup file being on array device.
+        * If array is partitioned or if LVM etc is in the
+        * way this will not notice, but it is better than
+        * nothing.
+        */
+       fstat(*fdlist, &stb);
+       dev = stb.st_dev;
+       fstat(fd, &stb);
+       if (stb.st_rdev == dev) {
+               fprintf(stderr, Name ": backup file must NOT be"
+                       " on the array being reshaped.\n");
+               close(*fdlist);
+               return 0;
+       }
+
+       memset(buf, 0, 512);
+       for (i=0; i < blocks + 1 ; i++) {
+               if (write(*fdlist, buf, 512) != 512) {
+                       fprintf(stderr, Name ": %s: cannot create"
+                               " backup file %s: %s\n",
+                               devname, backup_file, strerror(errno));
+                       return 0;
+               }
+       }
+       if (fsync(*fdlist) != 0) {
+               fprintf(stderr, Name ": %s: cannot create backup file %s: %s\n",
+                       devname, backup_file, strerror(errno));
+               return 0;
+       }
+
+       return 1;
+}
+
+unsigned long compute_backup_blocks(int nchunk, int ochunk,
+                                   unsigned int ndata, unsigned int odata)
+{
+       unsigned long a, b, blocks;
+       /* So how much do we need to backup.
+        * We need an amount of data which is both a whole number of
+        * old stripes and a whole number of new stripes.
+        * So LCM for (chunksize*datadisks).
+        */
+       a = (ochunk/512) * odata;
+       b = (nchunk/512) * ndata;
+       /* Find GCD */
+       while (a != b) {
+               if (a < b)
+                       b -= a;
+               if (b < a)
+                       a -= b;
+       }
+       /* LCM == product / GCD */
+       blocks = (ochunk/512) * (nchunk/512) * odata * ndata / a;
+
+       return blocks;
+}
+
+
 int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                 long long size,
-                int level, char *layout_str, int chunksize, int raid_disks)
+                int level, char *layout_str, int chunksize, int raid_disks,
+                int force)
 {
        /* Make some changes in the shape of an array.
         * The kernel must support the change.
@@ -501,23 +1101,26 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
        char *c;
        int rv = 0;
        struct supertype *st;
+       char *subarray = NULL;
 
        int nchunk, ochunk;
        int nlayout, olayout;
        int ndisks, odisks;
-       int ndata, odata;
+       unsigned int ndata, odata;
        int orig_level = UnSet;
        char alt_layout[40];
        int *fdlist;
        unsigned long long *offsets;
-       int d, i;
+       int d;
        int nrdisks;
        int err;
        int frozen;
-       unsigned long a,b, blocks, stripes;
-       int cache;
+       unsigned long blocks, stripes;
+       unsigned long cache;
        unsigned long long array_size;
        int changed = 0;
+       char *container = NULL;
+       int cfd = -1;
        int done;
 
        struct mdinfo *sra;
@@ -545,15 +1148,99 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                        "       Please use a newer kernel\n");
                return 1;
        }
-       sra = sysfs_read(fd, 0, GET_LEVEL);
-       if (sra)
-               frozen = freeze_array(sra);
-       else {
+
+       st = super_by_fd(fd, &subarray);
+       if (!st) {
+               fprintf(stderr, Name ": Unable to determine metadata format for %s\n", devname);
+               return 1;
+       }
+       if (raid_disks > st->max_devs) {
+               fprintf(stderr, Name ": Cannot increase raid-disks on this array"
+                       " beyond %d\n", st->max_devs);
+               return 1;
+       }
+
+       /* in the external case we need to check that the requested reshape is
+        * supported, and perform an initial check that the container holds the
+        * pre-requisite spare devices (mdmon owns final validation)
+        */
+       if (st->ss->external) {
+               int container_dev;
+               int rv;
+
+               if (subarray) {
+                       container_dev = st->container_dev;
+                       cfd = open_dev_excl(st->container_dev);
+               } else if (size >= 0 || layout_str != NULL || chunksize != 0 ||
+                          level != UnSet) {
+                       fprintf(stderr,
+                               Name ": %s is a container, only 'raid-devices' can be changed\n",
+                               devname);
+                       return 1;
+               } else {
+                       container_dev = st->devnum;
+                       close(fd);
+                       cfd = open_dev_excl(st->devnum);
+                       fd = cfd;
+               }
+               if (cfd < 0) {
+                       fprintf(stderr, Name ": Unable to open container for %s\n",
+                               devname);
+                       free(subarray);
+                       return 1;
+               }
+
+               container = devnum2devname(st->devnum);
+               if (!container) {
+                       fprintf(stderr, Name ": Could not determine container name\n");
+                       free(subarray);
+                       return 1;
+               }
+
+               if (subarray)
+                       rv = st->ss->load_container(st, cfd, NULL);
+               else
+                       rv = st->ss->load_super(st, cfd, NULL);
+               if (rv) {
+                       fprintf(stderr, Name ": Cannot read superblock for %s\n",
+                               devname);
+                       free(subarray);
+                       return 1;
+               }
+
+               if (mdmon_running(container_dev))
+                       st->update_tail = &st->updates;
+       } 
+
+       if (raid_disks > array.raid_disks &&
+           array.spare_disks < (raid_disks - array.raid_disks) &&
+           !force) {
+               fprintf(stderr,
+                       Name ": Need %d spare%s to avoid degraded array,"
+                       " and only have %d.\n"
+                       "       Use --force to over-ride this check.\n",
+                       raid_disks - array.raid_disks, 
+                       raid_disks - array.raid_disks == 1 ? "" : "s", 
+                       array.spare_disks);
+               return 1;
+       }
+
+       sra = sysfs_read(fd, 0, GET_LEVEL | GET_DISKS | GET_DEVS | GET_STATE);
+       if (sra) {
+               if (st->ss->external && subarray == NULL) {
+                       array.level = LEVEL_CONTAINER;
+                       sra->array.level = LEVEL_CONTAINER;
+               }
+       } else {
                fprintf(stderr, Name ": failed to read sysfs parameters for %s\n",
                        devname);
                return 1;
        }
-       if (frozen < 0) {
+       frozen = freeze(st);
+       if (frozen < -1) {
+               /* freeze() already spewed the reason */
+               return 1;
+       } else if (frozen < 0) {
                fprintf(stderr, Name ": %s is performing resync/recovery and cannot"
                        " be reshaped\n", devname);
                return 1;
@@ -561,6 +1248,13 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
 
        /* ========= set size =============== */
        if (size >= 0 && (size == 0 || size != array.size)) {
+               long long orig_size = array.size;
+
+               if (reshape_super(st, size, UnSet, UnSet, 0, 0, NULL, devname, !quiet)) {
+                       rv = 1;
+                       goto release;
+               }
+               sync_metadata(st);
                array.size = size;
                if (array.size != size) {
                        /* got truncated to 32bit, write to
@@ -575,6 +1269,11 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                        rv = ioctl(fd, SET_ARRAY_INFO, &array);
                if (rv != 0) {
                        int err = errno;
+
+                       /* restore metadata */
+                       if (reshape_super(st, orig_size, UnSet, UnSet, 0, 0,
+                                         NULL, devname, !quiet) == 0)
+                               sync_metadata(st);
                        fprintf(stderr, Name ": Cannot set device size for %s: %s\n",
                                devname, strerror(err));
                        if (err == EBUSY && 
@@ -591,12 +1290,31 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                        fprintf(stderr, Name ": component size of %s has been set to %lluK\n",
                                devname, size);
                changed = 1;
-       } else {
+       } else if (array.level != LEVEL_CONTAINER) {
                size = get_component_size(fd)/2;
                if (size == 0)
                        size = array.size;
        }
 
+       /* ========= check for Raid10 -> Raid0 conversion ===============
+        * current implemenation assumes that following conditions must be met:
+        * - far_copies == 1
+        * - near_copies == 2
+        */
+       if (level == 0 && array.level == 10 &&
+           array.layout == ((1 << 8) + 2) && !(array.raid_disks & 1)) {
+               int err;
+               err = remove_disks_on_raid10_to_raid0_takeover(st, sra, array.layout);
+               if (err) {
+                       dprintf(Name": Array cannot be reshaped\n");
+                       if (container)
+                               free(container);
+                       if (cfd > -1)
+                               close(cfd);
+                       return 1;
+               }
+       }
+
        /* ======= set level =========== */
        if (level != UnSet && level != array.level) {
                /* Trying to change the level.
@@ -674,11 +1392,48 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                        } else
                                layout_str = "parity-last";
                } else {
+                       /* Level change is a simple takeover.  In the external
+                        * case we don't check with the metadata handler until
+                        * we establish what the final layout will be.  If the
+                        * level change is disallowed we will revert to
+                        * orig_level without disturbing the metadata, otherwise
+                        * we will send an update.
+                        */
                        c = map_num(pers, level);
                        if (c == NULL) {
                                rv = 1;/* not possible */
                                goto release;
                        }
+                       if (!force) {
+                               /* Need to check there are enough spares */
+                               int spares_needed = 0;
+                               switch (array.level * 16 + level) {
+                               case 0x05:
+                                       spares_needed = 1; break;
+                               case 0x06:
+                                       spares_needed = 2; break;
+                               case 0x15:
+                                       spares_needed = 1; break;
+                               case 0x16:
+                                       spares_needed = 2; break;
+                               case 0x56:
+                                       spares_needed = 1; break;
+                               }
+                               if (raid_disks > array.raid_disks)
+                                       spares_needed += raid_disks-array.raid_disks;
+                               if (spares_needed > array.spare_disks) {
+                                       fprintf(stderr,
+                                               Name ": Need %d spare%s to avoid"
+                                               " degraded array, and only have %d.\n"
+                                               "       Use --force to over-ride"
+                                               " this check.\n",
+                                               spares_needed,
+                                               spares_needed == 1 ? "" : "s", 
+                                               array.spare_disks);
+                                       rv = 1;
+                                       goto release;
+                               }
+                       }
                        err = sysfs_set_str(sra, NULL, "level", c);
                        if (err) {
                                err = errno;
@@ -706,9 +1461,9 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
 
        /* ========= set shape (chunk_size / layout / ndisks)  ============== */
        /* Check if layout change is a no-op */
-       if (layout_str) switch(array.level) {
+       switch (array.level) {
        case 5:
-               if (array.layout == map_name(r5layout, layout_str))
+               if (layout_str && array.layout == map_name(r5layout, layout_str))
                        layout_str = NULL;
                break;
        case 6:
@@ -724,8 +1479,9 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                        rv = 1;
                        goto release;
                }
-               if (strcmp(layout_str, "normalise") == 0 ||
-                   strcmp(layout_str, "normalize") == 0) {
+               if (layout_str &&
+                   (strcmp(layout_str, "normalise") == 0 ||
+                    strcmp(layout_str, "normalize") == 0)) {
                        char *hyphen;
                        strcpy(alt_layout, map_num(r6layout, array.layout));
                        hyphen = strrchr(alt_layout, '-');
@@ -735,7 +1491,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                        }
                }
 
-               if (array.layout == map_name(r6layout, layout_str))
+               if (layout_str && array.layout == map_name(r6layout, layout_str))
                        layout_str = NULL;
                if (layout_str && strcmp(layout_str, "preserve") == 0)
                        layout_str = NULL;
@@ -744,6 +1500,11 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
        if (layout_str == NULL
            && (chunksize == 0 || chunksize*1024 == array.chunk_size)
            && (raid_disks == 0 || raid_disks == array.raid_disks)) {
+               if (reshape_super(st, -1, level, UnSet, 0, 0, NULL, devname, !quiet)) {
+                       rv = 1;
+                       goto release;
+               }
+               sync_metadata(st);
                rv = 0;
                if (level != UnSet && level != array.level) {
                        /* Looks like this level change doesn't need
@@ -759,23 +1520,75 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                                        if (err == EBUSY && 
                                            (array.state & (1<<MD_SB_BITMAP_PRESENT)))
                                                fprintf(stderr, "       Bitmap must be removed before level can be changed\n");
+                                       rv = 1;
                                }
                        }
                } else if (!changed && !quiet)
                        fprintf(stderr, Name ": %s: no change requested\n",
                                devname);
+
+               if (st->ss->external && !mdmon_running(st->container_dev) &&
+                   level > 0) {
+                       start_mdmon(st->container_dev);
+                       ping_monitor(container);
+               }
                goto release;
        }
 
        c = map_num(pers, array.level);
        if (c == NULL) c = "-unknown-";
-       switch(array.level) {
+       switch (array.level) {
        default: /* raid0, linear, multipath cannot be reconfigured */
                fprintf(stderr, Name ": %s array %s cannot be reshaped.\n",
                        c, devname);
+               /* TODO raid0 raiddisks can be reshaped via raid4 */
                rv = 1;
                break;
+       case LEVEL_CONTAINER: {
+               int count;
+
+               /* double check that we are not changing anything but raid_disks */
+               if (size >= 0 || layout_str != NULL || chunksize != 0 || level != UnSet) {
+                       fprintf(stderr,
+                               Name ": %s is a container, only 'raid-devices' can be changed\n",
+                               devname);
+                       rv = 1;
+                       goto release;
+               }
+
+               if (reshape_super(st, -1, UnSet, UnSet, 0, raid_disks,
+                                 backup_file, devname, !quiet)) {
+                       rv = 1;
+                       goto release;
+               }
+
+               count = reshape_container_raid_disks(st, container, raid_disks);
+               if (count < 0) {
+                       revert_container_raid_disks(st, fd, container);
+                       rv = 1;
+                       goto release;
+               } else if (count == 0) {
+                       if (!quiet)
+                               fprintf(stderr, Name
+                                       ": no active subarrays to reshape\n");
+                       goto release;
+               }
+
+               sync_metadata(st);
+               if (!mdmon_running(st->devnum)) {
+                       start_mdmon(st->devnum);
+                       ping_monitor(container);
+                       st->update_tail = &st->updates;
+               }
+
+               /* give mdmon a chance to allocate spares */
+               ping_manager(container);
 
+               /* manage_reshape takes care of releasing the array(s) */
+               st->ss->manage_reshape(st, backup_file);
+               frozen = 0;
+               goto release;
+       }
        case LEVEL_FAULTY: /* only 'layout' change is permitted */
 
                if (chunksize  || raid_disks) {
@@ -789,7 +1602,6 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
 
                array.layout = parse_layout_faulty(layout_str);
                if (array.layout < 0) {
-                       int rv;
                        fprintf(stderr, Name ": %s: layout %s not understood for 'faulty' array\n",
                                devname, layout_str);
                        rv = 1;
@@ -812,6 +1624,12 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                        break;
                }
                if (raid_disks > 0) {
+                       if (reshape_super(st, -1, UnSet, UnSet, 0, raid_disks,
+                                         NULL, devname, !quiet)) {
+                               rv = 1;
+                               goto release;
+                       }
+                       sync_metadata(st);
                        array.raid_disks = raid_disks;
                        if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
                                fprintf(stderr, Name ": Cannot set raid-devices for %s: %s\n",
@@ -829,7 +1647,10 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                 * layout/chunksize/raid_disks can be changed
                 * though the kernel may not support it all.
                 */
-               st = super_by_fd(fd);
+               if (subarray) {
+                       fprintf(stderr, Name ": Cannot reshape subarrays yet\n");
+                       break;
+               }
 
                /*
                 * There are three possibilities.
@@ -923,7 +1744,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
 
                /* Check that we can hold all the data */
                get_dev_size(fd, NULL, &array_size);
-               if (ndata * size < (array_size/1024)) {
+               if (ndata * (unsigned long long)size < (array_size/1024)) {
                        fprintf(stderr, Name ": this change will reduce the size of the array.\n"
                                "       use --grow --array-size first to truncate array.\n"
                                "       e.g. mdadm --grow %s --array-size %llu\n",
@@ -932,22 +1753,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                        break;
                }
 
-               /* So how much do we need to backup.
-                * We need an amount of data which is both a whole number of
-                * old stripes and a whole number of new stripes.
-                * So LCM for (chunksize*datadisks).
-                */
-               a = (ochunk/512) * odata;
-               b = (nchunk/512) * ndata;
-               /* Find GCD */
-               while (a != b) {
-                       if (a < b)
-                               b -= a;
-                       if (b < a)
-                               a -= b;
-               }
-               /* LCM == product / GCD */
-               blocks = (ochunk/512) * (nchunk/512) * odata * ndata / a;
+               blocks = compute_backup_blocks(nchunk, ochunk, ndata, odata);
 
                sysfs_free(sra);
                sra = sysfs_read(fd, 0,
@@ -979,7 +1785,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                        rv = 1;
                        break;
                }
-               nrdisks = array.nr_disks + sra->array.spare_disks;
+               nrdisks = array.raid_disks + sra->array.spare_disks;
                /* Now we need to open all these devices so we can read/write.
                 */
                fdlist = malloc((1+nrdisks) * sizeof(int));
@@ -989,40 +1795,21 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                        rv = 1;
                        break;
                }
-               for (d=0; d <= nrdisks; d++)
-                       fdlist[d] = -1;
-               d = array.raid_disks;
-               for (sd = sra->devs; sd; sd=sd->next) {
-                       if (sd->disk.state & (1<<MD_DISK_FAULTY))
-                               continue;
-                       if (sd->disk.state & (1<<MD_DISK_SYNC)) {
-                               char *dn = map_dev(sd->disk.major,
-                                                  sd->disk.minor, 1);
-                               fdlist[sd->disk.raid_disk]
-                                       = dev_open(dn, O_RDONLY);
-                               offsets[sd->disk.raid_disk] = sd->data_offset*512;
-                               if (fdlist[sd->disk.raid_disk] < 0) {
-                                       fprintf(stderr, Name ": %s: cannot open component %s\n",
-                                               devname, dn?dn:"-unknown-");
-                                       rv = 1;
-                                       goto release;
-                               }
-                       } else if (backup_file == NULL) {
-                               /* spare */
-                               char *dn = map_dev(sd->disk.major,
-                                                  sd->disk.minor, 1);
-                               fdlist[d] = dev_open(dn, O_RDWR);
-                               offsets[d] = (sd->data_offset + sra->component_size - blocks - 8)*512;
-                               if (fdlist[d]<0) {
-                                       fprintf(stderr, Name ": %s: cannot open component %s\n",
-                                               devname, dn?dn:"-unknown");
-                                       rv = 1;
-                                       goto release;
-                               }
-                               d++;
-                       }
+
+               d = reshape_prepare_fdlist(devname, sra, array.raid_disks,
+                                          nrdisks, blocks, backup_file,
+                                          fdlist, offsets);
+               if (d < 0) {
+                       rv = 1;
+                       goto release;
                }
                if (backup_file == NULL) {
+                       if (st->ss->external && !st->ss->manage_reshape) {
+                               fprintf(stderr, Name ": %s Grow operation not supported by %s metadata\n",
+                                       devname, st->ss->name);
+                               rv = 1;
+                               break;
+                       }
                        if (ndata <= odata) {
                                fprintf(stderr, Name ": %s: Cannot grow - need backup-file\n",
                                        devname);
@@ -1042,44 +1829,54 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                                break;
                        }
                } else {
-                       /* need to check backup file is large enough */
-                       char buf[512];
-                       fdlist[d] = open(backup_file, O_RDWR|O_CREAT|O_EXCL,
-                                    S_IRUSR | S_IWUSR);
-                       offsets[d] = 8 * 512;
-                       if (fdlist[d] < 0) {
-                               fprintf(stderr, Name ": %s: cannot create backup file %s: %s\n",
-                                       devname, backup_file, strerror(errno));
-                               rv = 1;
-                               break;
-                       }
-                       memset(buf, 0, 512);
-                       for (i=0; i < blocks + 1 ; i++) {
-                               if (write(fdlist[d], buf, 512) != 512) {
-                                       fprintf(stderr, Name ": %s: cannot create backup file %s: %s\n",
-                                               devname, backup_file, strerror(errno));
-                                       rv = 1;
-                                       break;
-                               }
-                       }
-                       if (fsync(fdlist[d]) != 0) {
-                               fprintf(stderr, Name ": %s: cannot create backup file %s: %s\n",
-                                       devname, backup_file, strerror(errno));
+                       if (!reshape_open_backup_file(backup_file, fd, devname,
+                                                     (signed)blocks,
+                                                     fdlist+d, offsets+d)) {
                                rv = 1;
                                break;
                        }
                        d++;
                }
 
+               /* check that the operation is supported by the metadata */
+               if (reshape_super(st, -1, level, nlayout, nchunk, ndisks,
+                                 backup_file, devname, !quiet)) {
+                       rv = 1;
+                       break;
+               }
+
+               /* ->reshape_super might have chosen some spares from the
+                * container that it wants to be part of the new array.
+                * We can collect them with ->container_content and give
+                * them to the kernel.
+                */
+               if (st->ss->reshape_super && st->ss->container_content) {
+                       struct mdinfo *info =
+                               st->ss->container_content(st, subarray);
+                       struct mdinfo *d;
+
+                       if (info)
+                               for (d = info->devs; d; d = d->next) {
+                                       if (d->disk.state == 0 &&
+                                           d->disk.raid_disk >= 0) {
+                                               /* This is a spare that wants to
+                                                * be part of the array.
+                                                */
+                                               add_disk(fd, st, info, d);
+                                       }
+                               }
+                       sysfs_free(info);
+               }
+
                /* lastly, check that the internal stripe cache is
                 * large enough, or it won't work.
                 */
                
                cache = (nchunk < ochunk) ? ochunk : nchunk;
                cache = cache * 4 / 4096;
-               if (cache < blocks / 8 / odisks + 16)
+               if (cache < blocks / 8 / odata + 16)
                        /* Make it big enough to hold 'blocks' */
-                       cache = blocks / 8 / odisks + 16;
+                       cache = blocks / 8 / odata + 16;
                if (sra->cache_size < cache)
                        sysfs_set_num(sra, NULL, "stripe_cache_size",
                                      cache+1);
@@ -1087,6 +1884,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                 * If only changing raid_disks, use ioctl, else use
                 * sysfs.
                 */
+               sync_metadata(st);
                if (ochunk == nchunk && olayout == nlayout) {
                        array.raid_disks = ndisks;
                        if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
@@ -1135,6 +1933,15 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                        break;
                }
 
+               start_reshape(sra);
+               if (st->ss->external) {
+                       /* metadata handler takes it from here */
+                       ping_manager(container);
+                       st->ss->manage_reshape(st, backup_file);
+                       frozen = 0;
+                       break;
+               }
+
                /* set up the backup-super-block.  This requires the
                 * uuid from the array.
                 */
@@ -1158,6 +1965,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                        fprintf(stderr, Name ": %s: Cannot find a superblock\n",
                                devname);
                        rv = 1;
+                       abort_reshape(sra);
                        break;
                }
 
@@ -1198,7 +2006,9 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                                                       d - odisks, fdlist+odisks, offsets+odisks);
                        if (backup_file && done)
                                unlink(backup_file);
-                       if (level != UnSet && level != array.level) {
+                       if (!done)
+                               abort_reshape(sra);
+                       else if (level != UnSet && level != array.level) {
                                /* We need to wait for the reshape to finish
                                 * (which will have happened unless odata < ndata)
                                 * and then set the level
@@ -1220,6 +2030,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                        fprintf(stderr, Name ": Cannot run child to monitor reshape: %s\n",
                                strerror(errno));
                        rv = 1;
+                       abort_reshape(sra);
                        break;
                default:
                        /* The child will take care of unfreezing the array */
@@ -1236,8 +2047,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                if (c && sysfs_set_str(sra, NULL, "level", c) == 0)
                        fprintf(stderr, Name ": aborting level change\n");
        }
-       if (sra)
-               unfreeze_array(sra, frozen);
+       unfreeze(st, frozen);
        return rv;
 }
 
@@ -1266,7 +2076,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
  */
 
 /* FIXME return status is never checked */
-int grow_backup(struct mdinfo *sra,
+static int grow_backup(struct mdinfo *sra,
                unsigned long long offset, /* per device */
                unsigned long stripes, /* per device */
                int *sources, unsigned long long *offsets,
@@ -1284,7 +2094,8 @@ int grow_backup(struct mdinfo *sra,
        int odata = disks;
        int rv = 0;
        int i;
-       unsigned long long new_degraded;
+       unsigned long long ll;
+       int new_degraded;
        //printf("offset %llu\n", offset);
        if (level >= 4)
                odata--;
@@ -1292,7 +2103,8 @@ int grow_backup(struct mdinfo *sra,
                odata--;
        sysfs_set_num(sra, NULL, "suspend_hi", (offset + stripes * (chunk/512)) * odata);
        /* Check that array hasn't become degraded, else we might backup the wrong data */
-       sysfs_get_ll(sra, NULL, "degraded", &new_degraded);
+       sysfs_get_ll(sra, NULL, "degraded", &ll);
+       new_degraded = (int)ll;
        if (new_degraded != *degraded) {
                /* check each device to ensure it is still working */
                struct mdinfo *sd;
@@ -1348,16 +2160,21 @@ int grow_backup(struct mdinfo *sra,
                        bsb.sb_csum2 = bsb_csum((char*)&bsb,
                                                ((char*)&bsb.sb_csum2)-((char*)&bsb));
 
-               if (lseek64(destfd[i], destoffsets[i] - 4096, 0) != destoffsets[i] - 4096)
-                       rv = 1;
-               rv = rv ?: write(destfd[i], &bsb, 512);
+               rv = -1;
+               if ((unsigned long long)lseek64(destfd[i], destoffsets[i] - 4096, 0)
+                   != destoffsets[i] - 4096)
+                       break;
+               if (write(destfd[i], &bsb, 512) != 512)
+                       break;
                if (destoffsets[i] > 4096) {
-                       if (lseek64(destfd[i], destoffsets[i]+stripes*chunk*odata, 0) !=
+                       if ((unsigned long long)lseek64(destfd[i], destoffsets[i]+stripes*chunk*odata, 0) !=
                            destoffsets[i]+stripes*chunk*odata)
-                               rv = 1;
-                       rv = rv ?: write(destfd[i], &bsb, 512);
+                               break;
+                       if (write(destfd[i], &bsb, 512) != 512)
+                               break;
                }
                fsync(destfd[i]);
+               rv = 0;
        }
 
        return rv;
@@ -1374,7 +2191,7 @@ int grow_backup(struct mdinfo *sra,
  * every works.
  */
 /* FIXME return value is often ignored */
-int wait_backup(struct mdinfo *sra,
+static int wait_backup(struct mdinfo *sra,
                unsigned long long offset, /* per device */
                unsigned long long blocks, /* per device */
                unsigned long long blocks2, /* per device - hack */
@@ -1392,9 +2209,12 @@ int wait_backup(struct mdinfo *sra,
        if (fd < 0)
                return -1;
        sysfs_set_num(sra, NULL, "sync_max", offset + blocks + blocks2);
-       if (offset == 0)
-               sysfs_set_str(sra, NULL, "sync_action", "reshape");
-       do {
+
+       if (sysfs_fd_get_ll(fd, &completed) < 0) {
+               close(fd);
+               return -1;
+       }
+       while (completed < offset + blocks) {
                char action[20];
                fd_set rfds;
                FD_ZERO(&rfds);
@@ -1408,7 +2228,7 @@ int wait_backup(struct mdinfo *sra,
                                  action, 20) > 0 &&
                    strncmp(action, "reshape", 7) != 0)
                        break;
-       } while (completed < offset + blocks);
+       }
        close(fd);
 
        if (part) {
@@ -1426,10 +2246,12 @@ int wait_backup(struct mdinfo *sra,
                if (memcmp(bsb.magic, "md_backup_data-2", 16) == 0)
                        bsb.sb_csum2 = bsb_csum((char*)&bsb,
                                                ((char*)&bsb.sb_csum2)-((char*)&bsb));
-               if (lseek64(destfd[i], destoffsets[i]-4096, 0) !=
+               if ((unsigned long long)lseek64(destfd[i], destoffsets[i]-4096, 0) !=
                    destoffsets[i]-4096)
-                       rv = 1;
-               rv = rv ?: write(destfd[i], &bsb, 512);
+                       rv = -1;
+               if (rv == 0 && 
+                   write(destfd[i], &bsb, 512) != 512)
+                       rv = -1;
                fsync(destfd[i]);
        }
        return rv;
@@ -1438,13 +2260,13 @@ int wait_backup(struct mdinfo *sra,
 static void fail(char *msg)
 {
        int rv;
-       rv = write(2, msg, strlen(msg));
-       rv |= write(2, "\n", 1);
+       rv = (write(2, msg, strlen(msg)) != (int)strlen(msg));
+       rv |= (write(2, "\n", 1) != 1);
        exit(rv ? 1 : 2);
 }
 
 static char *abuf, *bbuf;
-static int abuflen;
+static unsigned long long abuflen;
 static void validate(int afd, int bfd, unsigned long long offset)
 {
        /* check that the data in the backup against the array.
@@ -1485,12 +2307,12 @@ static void validate(int afd, int bfd, unsigned long long offset)
                }
 
                lseek64(bfd, offset, 0);
-               if (read(bfd, bbuf, len) != len) {
+               if ((unsigned long long)read(bfd, bbuf, len) != len) {
                        //printf("len %llu\n", len);
                        fail("read first backup failed");
                }
                lseek64(afd, __le64_to_cpu(bsb2.arraystart)*512, 0);
-               if (read(afd, abuf, len) != len)
+               if ((unsigned long long)read(afd, abuf, len) != len)
                        fail("read first from array failed");
                if (memcmp(bbuf, abuf, len) != 0) {
                        #if 0
@@ -1518,10 +2340,10 @@ static void validate(int afd, int bfd, unsigned long long offset)
                }
 
                lseek64(bfd, offset+__le64_to_cpu(bsb2.devstart2)*512, 0);
-               if (read(bfd, bbuf, len) != len)
+               if ((unsigned long long)read(bfd, bbuf, len) != len)
                        fail("read second backup failed");
                lseek64(afd, __le64_to_cpu(bsb2.arraystart2)*512, 0);
-               if (read(afd, abuf, len) != len)
+               if ((unsigned long long)read(afd, abuf, len) != len)
                        fail("read second from array failed");
                if (memcmp(bbuf, abuf, len) != 0)
                        fail("data2 compare failed");
@@ -1539,8 +2361,6 @@ static int child_grow(int afd, struct mdinfo *sra, unsigned long stripes,
        if (posix_memalign((void**)&buf, 4096, disks * chunk))
                /* Don't start the 'reshape' */
                return 0;
-       sysfs_set_num(sra, NULL, "suspend_hi", 0);
-       sysfs_set_num(sra, NULL, "suspend_lo", 0);
        grow_backup(sra, 0, stripes,
                    fds, offsets, disks, chunk, level, layout,
                    dests, destfd, destoffsets,
@@ -1570,9 +2390,6 @@ static int child_shrink(int afd, struct mdinfo *sra, unsigned long stripes,
                return 0;
        start = sra->component_size - stripes * (chunk/512);
        sysfs_set_num(sra, NULL, "sync_max", start);
-       sysfs_set_str(sra, NULL, "sync_action", "reshape");
-       sysfs_set_num(sra, NULL, "suspend_lo", 0);
-       sysfs_set_num(sra, NULL, "suspend_hi", 0);
        rv = wait_backup(sra, 0, start - stripes * (chunk/512), stripes * (chunk/512),
                         dests, destfd, destoffsets, 0);
        if (rv < 0)
@@ -1609,9 +2426,6 @@ static int child_same_size(int afd, struct mdinfo *sra, unsigned long stripes,
        if (posix_memalign((void**)&buf, 4096, disks * chunk))
                return 0;
 
-       sysfs_set_num(sra, NULL, "suspend_lo", 0);
-       sysfs_set_num(sra, NULL, "suspend_hi", 0);
-
        sysfs_get_ll(sra, NULL, "sync_speed_min", &speed);
        sysfs_set_num(sra, NULL, "sync_speed_min", 200000);
 
@@ -1719,7 +2533,7 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt
                        if (st->ss->load_super(st, fd, NULL))
                                continue;
 
-                       st->ss->getinfo_super(st, &dinfo);
+                       st->ss->getinfo_super(st, &dinfo, NULL);
                        st->ss->free_super(st);
 
                        if (lseek64(fd,
@@ -1763,8 +2577,8 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt
                 * sometimes they aren't... So allow considerable flexability in matching, and allow
                 * this test to be overridden by an environment variable.
                 */
-               if (info->array.utime > __le64_to_cpu(bsb.mtime) + 2*60*60 ||
-                   info->array.utime < __le64_to_cpu(bsb.mtime) - 10*60) {
+               if (info->array.utime > (int)__le64_to_cpu(bsb.mtime) + 2*60*60 ||
+                   info->array.utime < (int)__le64_to_cpu(bsb.mtime) - 10*60) {
                        if (check_env("MDADM_GROW_ALLOW_OLD")) {
                                fprintf(stderr, Name ": accepting backup with timestamp %lu "
                                        "for array with timestamp %lu\n",
@@ -1820,7 +2634,7 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt
                }
                /* There should be a duplicate backup superblock 4k before here */
                if (lseek64(fd, -4096, 1) < 0 ||
-                   read(fd, &bsb2, 4096) != 4096)
+                   read(fd, &bsb2, sizeof(bsb2)) != sizeof(bsb2))
                        goto second_fail; /* Cannot find leading superblock */
                if (bsb.magic[15] == '1')
                        bsbsize = offsetof(struct mdp_backup_super, pad1);
@@ -1837,7 +2651,7 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt
                        if (st->ss->load_super(st, fdlist[j], NULL))
                                /* FIXME should be this be an error */
                                continue;
-                       st->ss->getinfo_super(st, &dinfo);
+                       st->ss->getinfo_super(st, &dinfo, NULL);
                        st->ss->free_super(st);
                        offsets[j] = dinfo.data_offset * 512;
                }
@@ -1899,7 +2713,7 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt
                        if (fdlist[j] < 0) continue;
                        if (st->ss->load_super(st, fdlist[j], NULL))
                                continue;
-                       st->ss->getinfo_super(st, &dinfo);
+                       st->ss->getinfo_super(st, &dinfo, NULL);
                        dinfo.reshape_progress = info->reshape_progress;
                        st->ss->update_super(st, &dinfo,
                                             "_reshape_progress",
@@ -1973,15 +2787,9 @@ int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info,
        int d;
        struct mdinfo *sra, *sd;
        int rv;
-       int cache;
+       unsigned long cache;
        int done = 0;
 
-       sra = sysfs_read(-1, devname2devnum(info->sys_name),
-                        GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE|
-                        GET_CACHE);
-       if (!sra)
-               return 1;
-
        err = sysfs_set_str(info, NULL, "array_state", "readonly");
        if (err)
                return err;
@@ -1989,7 +2797,13 @@ int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info,
        /* make sure reshape doesn't progress until we are ready */
        sysfs_set_str(info, NULL, "sync_max", "0");
        sysfs_set_str(info, NULL, "array_state", "active"); /* FIXME or clean */
-       
+
+       sra = sysfs_read(-1, devname2devnum(info->sys_name),
+                        GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE|
+                        GET_CACHE);
+       if (!sra)
+               return 1;
+
        /* ndisks is not growing, so raid_disks is old and +delta is new */
        odisks = info->array.raid_disks;
        ndisks = odisks + info->delta_disks;
@@ -2039,6 +2853,11 @@ int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info,
        bsb.devstart2 = blocks;
 
        backup_fd = open(backup_file, O_RDWR|O_CREAT, S_IRUSR | S_IWUSR);
+       if (backup_fd < 0) {
+               fprintf(stderr, Name ": Cannot open backup file %s\n",
+                       backup_file ?: "- no backup-file given");
+               return 1;
+       }
        backup_list[0] = backup_fd;
        backup_offsets[0] = 8 * 512;
        fds = malloc(odisks * sizeof(fds[0]));