]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - Grow.c
FIX: Array after takeover has to be frozen
[thirdparty/mdadm.git] / Grow.c
diff --git a/Grow.c b/Grow.c
index 43ef42190c085d3f74c891673bbde594a11a823d..958febf40cb34b2eb34e040426a8f2e029fe032d 100644 (file)
--- a/Grow.c
+++ b/Grow.c
@@ -453,20 +453,6 @@ static __u32 bsb_csum(char *buf, int len)
        return __cpu_to_le32(csum);
 }
 
-static int child_grow(int afd, struct mdinfo *sra, unsigned long blocks,
-                     int *fds, unsigned long long *offsets,
-                     int disks, int chunk, int level, int layout, int data,
-                     int dests, int *destfd, unsigned long long *destoffsets);
-static int child_shrink(int afd, struct mdinfo *sra, unsigned long blocks,
-                       int *fds, unsigned long long *offsets,
-                       int disks, int chunk, int level, int layout, int data,
-                       int dests, int *destfd, unsigned long long *destoffsets);
-static int child_same_size(int afd, struct mdinfo *sra, unsigned long blocks,
-                          int *fds, unsigned long long *offsets,
-                          unsigned long long start,
-                          int disks, int chunk, int level, int layout, int data,
-                          int dests, int *destfd, unsigned long long *destoffsets);
-
 static int check_idle(struct supertype *st)
 {
        /* Check that all member arrays for this container, or the
@@ -544,12 +530,8 @@ static int freeze(struct supertype *st)
        }
 }
 
-static void unfreeze(struct supertype *st, int frozen)
+static void unfreeze(struct supertype *st)
 {
-       /* If 'frozen' is 1, unfreeze the array */
-       if (frozen <= 0)
-               return;
-
        if (st->ss->external)
                return unfreeze_container(st);
        else {
@@ -648,9 +630,13 @@ static int subarray_set_num(char *container, struct mdinfo *sra, char *name, int
 int start_reshape(struct mdinfo *sra)
 {
        int err;
+       sysfs_set_num(sra, NULL, "suspend_lo", 0x7FFFFFFFFFFFFFFFULL);
        err = sysfs_set_num(sra, NULL, "suspend_hi", 0);
        err = err ?: sysfs_set_num(sra, NULL, "suspend_lo", 0);
-       err = err ?: sysfs_set_num(sra, NULL, "sync_min", 0);
+       /* Setting sync_min can fail if the recovery is already 'running',
+        * which can happen when restarting an array which is reshaping.
+        * So don't worry about errors here */
+       sysfs_set_num(sra, NULL, "sync_min", 0);
        err = err ?: sysfs_set_num(sra, NULL, "sync_max", 0);
        err = err ?: sysfs_set_str(sra, NULL, "sync_action", "reshape");
 
@@ -667,15 +653,20 @@ void abort_reshape(struct mdinfo *sra)
        sysfs_set_str(sra, NULL, "sync_max", "max");
 }
 
-int remove_disks_on_raid10_to_raid0_takeover(struct supertype *st,
-                                            struct mdinfo *sra,
-                                            int layout)
+int remove_disks_for_takeover(struct supertype *st,
+                             struct mdinfo *sra,
+                             int layout)
 {
        int nr_of_copies;
        struct mdinfo *remaining;
        int slot;
 
-       nr_of_copies = layout & 0xff;
+       if (sra->array.level == 10)
+               nr_of_copies = layout & 0xff;
+       else if (sra->array.level == 1)
+               nr_of_copies = sra->array.raid_disks;
+       else
+               return 1;
 
        remaining = sra->devs;
        sra->devs = NULL;
@@ -806,7 +797,8 @@ int reshape_open_backup_file(char *backup_file,
                             char *devname,
                             long blocks,
                             int *fdlist,
-                            unsigned long long *offsets)
+                            unsigned long long *offsets,
+                            int restart)
 {
        /* Return 1 on success, 0 on any form of failure */
        /* need to check backup file is large enough */
@@ -815,7 +807,7 @@ int reshape_open_backup_file(char *backup_file,
        unsigned int dev;
        int i;
 
-       *fdlist = open(backup_file, O_RDWR|O_CREAT|O_EXCL,
+       *fdlist = open(backup_file, O_RDWR|O_CREAT|(restart ? O_TRUNC : O_EXCL),
                       S_IRUSR | S_IWUSR);
        *offsets = 8 * 512;
        if (*fdlist < 0) {
@@ -880,30 +872,6 @@ unsigned long compute_backup_blocks(int nchunk, int ochunk,
        return blocks;
 }
 
-/* 'struct reshape' records the intermediate states
- * a general reshape.
- * The starting geometry is converted to the 'before' geometry
- * by at most an atomic level change. They could be the same.
- * Similarly the 'after' geometry is converted to the final
- * geometry by at most a level change.
- * Note that 'before' and 'after' must have the same level.
- * 'blocks' is the minimum number of sectors for a reshape unit.
- * This will be a multiple of the stripe size in each of the
- * 'before' and 'after' geometries.
- * If 'blocks' is 0, no restriping is necessary.
- */
-struct reshape {
-       int level;
-       int parity; /* number of parity blocks/devices */
-       struct {
-               int layout;
-               int data_disks;
-       } before, after;
-       unsigned long long blocks;
-       unsigned long long stripes; /* number of old stripes that comprise 'blocks'*/
-       unsigned long long new_size; /* New size of array in sectors */
-};
-
 char *analyse_change(struct mdinfo *info, struct reshape *re)
 {
        /* Based on the current array state in info->array and
@@ -950,29 +918,35 @@ char *analyse_change(struct mdinfo *info, struct reshape *re)
        switch (info->array.level) {
        case 1:
                /* RAID1 can convert to RAID1 with different disks, or
-                * raid5 with 2 disks
+                * raid5 with 2 disks, or
+                * raid0 with 1 disk
                 */
+               if (info->new_level == 0) {
+                       re->level = 0;
+                       re->before.data_disks = 1;
+                       re->after.data_disks = 1;
+                       re->before.layout = 0;
+                       re->backup_blocks = 0;
+                       re->parity = 0;
+                       return NULL;
+               }
                if (info->new_level == 1) {
                        if (info->delta_disks == UnSet)
                                /* Don't know what to do */
                                return "no change requested for Growing RAID1";
                        re->level = 1;
-                       re->before.data_disks = (info->array.raid_disks +
-                                                info->delta_disks);
-                       re->before.layout = 0;
-                       re->blocks = 0;
+                       re->backup_blocks = 0;
                        re->parity = 0;
                        return NULL;
                }
                if (info->array.raid_disks == 2 &&
-                   info->array.raid_disks == 5) {
-                       /* simple in-place conversion */
+                   info->new_level == 5) {
                        re->level = 5;
-                       re->parity = 1;
                        re->before.data_disks = 1;
+                       re->after.data_disks = 1;
                        re->before.layout = ALGORITHM_LEFT_SYMMETRIC;
-                       re->blocks = 0;
-                       return NULL;
+                       info->array.chunk_size = 65536;
+                       break;
                }
                /* Could do some multi-stage conversions, but leave that to
                 * later.
@@ -993,7 +967,7 @@ char *analyse_change(struct mdinfo *info, struct reshape *re)
                        return "RAID10 can only be changed to RAID0";
                new_disks = (info->array.raid_disks
                             / (info->array.layout & 0xff));
-               if (info->delta_disks != UnSet) {
+               if (info->delta_disks == UnSet) {
                        info->delta_disks = (new_disks
                                             - info->array.raid_disks);
                }
@@ -1007,8 +981,9 @@ char *analyse_change(struct mdinfo *info, struct reshape *re)
                re->level = 0;
                re->parity = 0;
                re->before.data_disks = new_disks;
+               re->after.data_disks = re->before.data_disks;
                re->before.layout = 0;
-               re->blocks = 0;
+               re->backup_blocks = 0;
                return NULL;
 
        case 0:
@@ -1044,8 +1019,9 @@ char *analyse_change(struct mdinfo *info, struct reshape *re)
                        re->parity = 0;
                        re->before.data_disks = (info->array.raid_disks +
                                                 info->delta_disks);
+                       re->after.data_disks = re->before.data_disks;
                        re->before.layout = info->new_layout;
-                       re->blocks = 0;
+                       re->backup_blocks = 0;
                        return NULL;
                }
 
@@ -1117,8 +1093,6 @@ char *analyse_change(struct mdinfo *info, struct reshape *re)
                        if (info->array.raid_disks != 2)
                                return "Can only convert a 2-device array to RAID1";
                        re->level = 1;
-                       re->before.data_disks = 2;
-                       re->before.layout = 0;
                        break;
                default:
                        return "Impossible level change requested";
@@ -1203,10 +1177,9 @@ char *analyse_change(struct mdinfo *info, struct reshape *re)
                                re->after.layout = info->new_layout;
                        break;
                case 6:
-                       if (info->new_layout == UnSet) {
-                               re->after.layout = re->before.layout;
-                               break;
-                       }
+                       if (info->new_layout == UnSet)
+                               info->new_layout = re->before.layout;
+
                        /* after.layout needs to be raid6 version of new_layout */
                        if (info->new_layout == ALGORITHM_PARITY_N)
                                re->after.layout = ALGORITHM_PARITY_N;
@@ -1234,7 +1207,7 @@ char *analyse_change(struct mdinfo *info, struct reshape *re)
                        re->after.data_disks = (info->array.raid_disks +
                                                info->delta_disks) - 2;
                if (info->new_layout == UnSet)
-                       re->after.layout = re->before.layout;
+                       re->after.layout = info->array.layout;
                else
                        re->after.layout = info->new_layout;
                break;
@@ -1256,12 +1229,12 @@ char *analyse_change(struct mdinfo *info, struct reshape *re)
            re->after.layout == re->before.layout &&
            info->new_chunk == info->array.chunk_size) {
                /* Nothing to change */
-               re->blocks = 0;
+               re->backup_blocks = 0;
                return NULL;
        }
        if (re->after.data_disks == 1 && re->before.data_disks == 1) {
-               /* chunks can layout changes make no difference */
-               re->blocks = 0;
+               /* chunk and layout changes make no difference */
+               re->backup_blocks = 0;
                return NULL;
        }
 
@@ -1273,7 +1246,7 @@ char *analyse_change(struct mdinfo *info, struct reshape *re)
            get_linux_version() < 2006030)
                return "reshape to fewer devices is not supported before 2.6.32 - sorry.";
 
-       re->blocks = compute_backup_blocks(
+       re->backup_blocks = compute_backup_blocks(
                info->new_chunk, info->array.chunk_size,
                re->after.data_disks,
                re->before.data_disks);
@@ -1284,7 +1257,8 @@ char *analyse_change(struct mdinfo *info, struct reshape *re)
 
 static int reshape_array(char *container, int fd, char *devname,
                         struct supertype *st, struct mdinfo *info,
-                        int force, char *backup_file, int quiet, int forked);
+                        int force, char *backup_file, int quiet, int forked,
+                        int restart);
 static int reshape_container(char *container, int cfd, char *devname,
                             struct supertype *st, 
                             struct mdinfo *info,
@@ -1389,10 +1363,8 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                fmt_devname(container_buf, container_dev);
                container = container_buf;
 
-               if (subarray)
-                       rv = st->ss->load_container(st, cfd, NULL);
-               else
-                       rv = st->ss->load_super(st, cfd, NULL);
+               rv = st->ss->load_container(st, cfd, NULL);
+
                if (rv) {
                        fprintf(stderr, Name ": Cannot read superblock for %s\n",
                                devname);
@@ -1417,8 +1389,9 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                return 1;
        }
 
-       sra = sysfs_read(fd, 0, GET_LEVEL | GET_DISKS | GET_DEVS | GET_STATE);
-       if (sra) {
+       sra = sysfs_read(fd, 0, GET_LEVEL | GET_DISKS | GET_DEVS
+                        | GET_STATE | GET_VERSION);
+       if (sra) {
                if (st->ss->external && subarray == NULL) {
                        array.level = LEVEL_CONTAINER;
                        sra->array.level = LEVEL_CONTAINER;
@@ -1488,15 +1461,17 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                        size = array.size;
        }
 
-       /* ========= check for Raid10 -> Raid0 conversion ===============
+       /* ========= check for Raid10/Raid1 -> Raid0 conversion ===============
         * current implementation assumes that following conditions must be met:
-        * - far_copies == 1
-        * - near_copies == 2
+        * - RAID10:
+        *      - far_copies == 1
+        *      - near_copies == 2
         */
-       if (level == 0 && array.level == 10 && sra &&
-           array.layout == ((1 << 8) + 2) && !(array.raid_disks & 1)) {
+       if ((level == 0 && array.level == 10 && sra &&
+           array.layout == ((1 << 8) + 2) && !(array.raid_disks & 1)) ||
+           (level == 0 && array.level == 1 && sra)) {
                int err;
-               err = remove_disks_on_raid10_to_raid0_takeover(st, sra, array.layout);
+               err = remove_disks_for_takeover(st, sra, array.layout);
                if (err) {
                        dprintf(Name": Array cannot be reshaped\n");
                        if (cfd > -1)
@@ -1504,9 +1479,14 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                        rv = 1;
                        goto release;
                }
+               /* FIXME this is added with no justification - why is it here */
+               ping_monitor(container);
        }
 
        info.array = array;
+       sysfs_init(&info, fd, NoMdDev);
+       strcpy(info.text_version, sra->text_version);
+       info.component_size = size*2;
        info.new_level = level;
        info.new_chunk = chunksize * 1024;
        if (raid_disks)
@@ -1587,6 +1567,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                 */
                rv = reshape_container(container, fd, devname, st, &info,
                                       force, backup_file, quiet);
+               frozen = 0;
        } else {
                /* Impose these changes on a single array.  First
                 * check that the metadata is OK with the change. */
@@ -1600,17 +1581,20 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                }
                sync_metadata(st);
                rv = reshape_array(container, fd, devname, st, &info, force,
-                                  backup_file, quiet, 0);
+                                  backup_file, quiet, 0, 0);
+               frozen = 0;
        }
 release:
-       unfreeze(st, frozen);
+       if (frozen > 0)
+               unfreeze(st);
        return rv;
 }
 
 static int reshape_array(char *container, int fd, char *devname,
                         struct supertype *st, struct mdinfo *info,
                         int force,
-                        char *backup_file, int quiet, int forked)
+                        char *backup_file, int quiet, int forked,
+                        int restart)
 {
        struct reshape reshape;
        int spares_needed;
@@ -1620,30 +1604,47 @@ static int reshape_array(char *container, int fd, char *devname,
 
        struct mdu_array_info_s array;
        char *c;
-       int rv = 0;
 
        int *fdlist;
        unsigned long long *offsets;
        int d;
        int nrdisks;
        int err;
-       int frozen;
-       unsigned long blocks, stripes;
+       unsigned long blocks;
        unsigned long cache;
        unsigned long long array_size;
        int done;
-       struct mdinfo *sra, *sd;
+       struct mdinfo *sra = NULL;
 
        msg = analyse_change(info, &reshape);
        if (msg) {
                fprintf(stderr, Name ": %s\n", msg);
-               return 1;
+               goto release;
        }
+       if (ioctl(fd, GET_ARRAY_INFO, &array) != 0) {
+               dprintf("Canot get array information.\n");
+               goto release;
+       }
+
+       if (restart) {
+               /* reshape already started. just skip to monitoring the reshape */
+               if (reshape.backup_blocks == 0)
+                       return 0;
+               goto started;
+       }
+       /* The container is frozen but the array may not be.
+        * So freeze the array so spares don't get put to the wrong use
+        * FIXME there should probably be a cleaner separation between
+        * freeze_array and freeze_container.
+        */
+       sysfs_freeze_array(info);
        spares_needed = max(reshape.before.data_disks,
                            reshape.after.data_disks)
                + reshape.parity - array.raid_disks;
 
-       if (!force && spares_needed < info->array.spare_disks) {
+       if (!force &&
+           info->new_level > 1 &&
+           spares_needed > info->array.spare_disks) {
                fprintf(stderr,
                        Name ": Need %d spare%s to avoid degraded array,"
                        " and only have %d.\n"
@@ -1651,14 +1652,14 @@ static int reshape_array(char *container, int fd, char *devname,
                        spares_needed,
                        spares_needed == 1 ? "" : "s", 
                        info->array.spare_disks);
-               return 1;
+               goto release;
        }
 
        if (reshape.level != info->array.level) {
                char *c = map_num(pers, reshape.level);
                int err;
                if (c == NULL)
-                       return 1; /* This should not be possible */
+                       goto release;
 
                err = sysfs_set_str(info, NULL, "level", c);
                if (err) {
@@ -1669,20 +1670,20 @@ static int reshape_array(char *container, int fd, char *devname,
                            (info->array.state & (1<<MD_SB_BITMAP_PRESENT)))
                                fprintf(stderr, "       Bitmap must be removed"
                                        " before level can be changed\n");
-                       return 1;
+                       goto release;
                }
                if (!quiet)
-                       fprintf(stderr, Name " level of %s changed to %s\n",
+                       fprintf(stderr, Name ": level of %s changed to %s\n",
                                devname, c);    
                orig_level = info->array.level;
-       }
 
-       if (reshape.level > 0 && st->ss->external &&
-           !mdmon_running(st->container_dev)) {
-               start_mdmon(st->container_dev);
-               ping_monitor(container);
+               if (reshape.level > 0 && st->ss->external) {
+                       /* make sure mdmon is aware of the new level */
+                       if (!mdmon_running(st->container_dev))
+                               start_mdmon(st->container_dev);
+                       ping_monitor(container);
+               }
        }
-
        /* ->reshape_super might have chosen some spares from the
         * container that it wants to be part of the new array.
         * We can collect them with ->container_content and give
@@ -1694,7 +1695,8 @@ static int reshape_array(char *container, int fd, char *devname,
                        st->ss->container_content(st, subarray);
                struct mdinfo *d;
 
-               if (info2)
+               if (info2) {
+                       sysfs_init(info2, fd, st->devnum);
                        for (d = info2->devs; d; d = d->next) {
                                if (d->disk.state == 0 &&
                                    d->disk.raid_disk >= 0) {
@@ -1704,10 +1706,11 @@ static int reshape_array(char *container, int fd, char *devname,
                                        add_disk(fd, st, info2, d);
                                }
                        }
-               sysfs_free(info2);
+                       sysfs_free(info2);
+               }
        }
 
-       if (reshape.blocks == 0) {
+       if (reshape.backup_blocks == 0) {
                /* No restriping needed, but we might need to impose
                 * some more changes: layout, raid_disks, chunk_size
                 */
@@ -1716,7 +1719,7 @@ static int reshape_array(char *container, int fd, char *devname,
                        info->array.layout = info->new_layout;
                        if (ioctl(fd, SET_ARRAY_INFO, &info->array) != 0) {
                                fprintf(stderr, Name ": failed to set new layout\n");
-                               rv = 1;
+                               goto release;
                        } else if (!quiet)
                                printf("layout for %s set to %d\n",
                                       devname, info->array.layout);
@@ -1726,7 +1729,7 @@ static int reshape_array(char *container, int fd, char *devname,
                        info->array.raid_disks += info->delta_disks;
                        if (ioctl(fd, SET_ARRAY_INFO, &info->array) != 0) {
                                fprintf(stderr, Name ": failed to set raid disks\n");
-                               rv = 1;
+                               goto release;
                        } else if (!quiet)
                                printf("raid_disks for %s set to %d\n",
                                       devname, info->array.raid_disks);
@@ -1736,13 +1739,13 @@ static int reshape_array(char *container, int fd, char *devname,
                        if (sysfs_set_num(info, NULL,
                                          "chunk_size", info->new_chunk) != 0) {
                                fprintf(stderr, Name ": failed to set chunk size\n");
-                               rv = 1;
+                               goto release;
                        } else if (!quiet)
                                printf("chunk size for %s set to %d\n",
                                       devname, info->array.chunk_size);
                }
-
-               return rv;
+               unfreeze(st);
+               return 0;
        }
 
        /*
@@ -1778,7 +1781,7 @@ static int reshape_array(char *container, int fd, char *devname,
         *   -  request the shape change.
         *   -  fork to handle backup etc.
         */
-
+started:
        /* Check that we can hold all the data */
        get_dev_size(fd, NULL, &array_size);
        if (reshape.new_size < (array_size/512)) {
@@ -1787,25 +1790,22 @@ static int reshape_array(char *container, int fd, char *devname,
                        "       use --grow --array-size first to truncate array.\n"
                        "       e.g. mdadm --grow %s --array-size %llu\n",
                        devname, reshape.new_size/2);
-               rv = 1;
                goto release;
        }
 
        sra = sysfs_read(fd, 0,
-                        GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE|
+                        GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE|GET_CHUNK|
                         GET_CACHE);
-
        if (!sra) {
                fprintf(stderr, Name ": %s: Cannot get array details from sysfs\n",
                        devname);
-               rv = 1;
                goto release;
        }
 
        /* Decide how many blocks (sectors) for a reshape
         * unit.  The number we have so far is just a minimum
         */
-       blocks = reshape.blocks;
+       blocks = reshape.backup_blocks;
        if (reshape.before.data_disks == 
            reshape.after.data_disks) {
                /* Make 'blocks' bigger for better throughput, but
@@ -1823,7 +1823,6 @@ static int reshape_array(char *container, int fd, char *devname,
                fprintf(stderr, Name ": %s: Something wrong"
                        " - reshape aborted\n",
                        devname);
-               rv = 1;
                goto release;
        }
 
@@ -1834,7 +1833,6 @@ static int reshape_array(char *container, int fd, char *devname,
        offsets = malloc((1+nrdisks) * sizeof(offsets[0]));
        if (!fdlist || !offsets) {
                fprintf(stderr, Name ": malloc failed: grow aborted\n");
-               rv = 1;
                goto release;
        }
 
@@ -1842,28 +1840,24 @@ static int reshape_array(char *container, int fd, char *devname,
                                   nrdisks, blocks, backup_file,
                                   fdlist, offsets);
        if (d < 0) {
-               rv = 1;
                goto release;
        }
        if (backup_file == NULL) {
-               if (reshape.after.data_disks <= reshape.before.data_disks) {
+               if (reshape.after.data_disks <= reshape.before.data_disks) {
                        fprintf(stderr,
                                Name ": %s: Cannot grow - need backup-file\n", 
                                devname);
-                       rv = 1;
                        goto release;
                } else if (sra->array.spare_disks == 0) {
                        fprintf(stderr, Name ": %s: Cannot grow - need a spare or "
                                "backup-file to backup critical section\n",
                                devname);
-                       rv = 1;
                        goto release;
                }
        } else {
                if (!reshape_open_backup_file(backup_file, fd, devname,
                                              (signed)blocks,
-                                             fdlist+d, offsets+d)) {
-                       rv = 1;
+                                             fdlist+d, offsets+d, restart)) {
                        goto release;
                }
                d++;
@@ -1893,12 +1887,27 @@ static int reshape_array(char *container, int fd, char *devname,
         */
        sync_metadata(st);
 
+       sra->new_chunk = info->new_chunk;
+
+       if (info->reshape_active)
+               sra->reshape_progress = info->reshape_progress;
+       else {
+               sra->reshape_progress = 0;
+               if (reshape.after.data_disks < reshape.before.data_disks)
+                       /* start from the end of the new array */
+                       sra->reshape_progress = (sra->component_size
+                                                * reshape.after.data_disks);
+       }
+
        if (info->array.chunk_size == info->new_chunk &&
-           reshape.before.layout == reshape.after.layout) {
+           reshape.before.layout == reshape.after.layout &&
+           st->ss->external == 0) {
+               /* use SET_ARRAY_INFO but only if reshape hasn't started */
                array.raid_disks = reshape.after.data_disks + reshape.parity;
-               if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
+               if (!info->reshape_active &&
+                   ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
                        int err = errno;
-                       rv = 1;
+
                        fprintf(stderr,
                                Name ": Cannot set device shape for %s: %s\n",
                                devname, strerror(errno));
@@ -1913,22 +1922,22 @@ static int reshape_array(char *container, int fd, char *devname,
                }
        } else {
                /* set them all just in case some old 'new_*' value
-                * persists from some earlier problem
+                * persists from some earlier problem.
+                * We even set them when restarting in the middle.  They will
+                * already be set in that case so this will be a no-op,
+                * but it is hard to tell the difference.
                 */
-               int err = err; /* only used if rv==1, and always set if
-                               * rv==1, so initialisation not needed,
-                               * despite gcc warning
-                               */
+               int err = 0;
                if (sysfs_set_num(sra, NULL, "chunk_size", info->new_chunk) < 0)
-                       rv = 1, err = errno;
-               if (!rv && sysfs_set_num(sra, NULL, "layout", 
+                       err = errno;
+               if (!err && sysfs_set_num(sra, NULL, "layout", 
                                         reshape.after.layout) < 0)
-                       rv = 1, err = errno;
-               if (!rv && subarray_set_num(container, sra, "raid_disks",
+                       err = errno;
+               if (!err && subarray_set_num(container, sra, "raid_disks",
                                            reshape.after.data_disks +
                                            reshape.parity) < 0)
-                       rv = 1, err = errno;
-               if (rv) {
+                       err = errno;
+               if (err) {
                        fprintf(stderr, Name ": Cannot set device shape for %s\n",
                                devname);
 
@@ -1942,140 +1951,138 @@ static int reshape_array(char *container, int fd, char *devname,
        }
 
        start_reshape(sra);
-       if (st->ss->external) {
-               /* metadata handler takes it from here */
-               ping_manager(container);
-               st->ss->manage_reshape(st, backup_file);
-               frozen = 0;
-               goto release;
-       }
+       if (restart)
+               sysfs_set_str(sra, NULL, "array_state", "active");
 
-       /* set up the backup-super-block.  This requires the
-        * uuid from the array.
+       /* Now we just need to kick off the reshape and watch, while
+        * handling backups of the data...
+        * This is all done by a forked background process.
         */
-       /* Find a superblock */
-       for (sd = sra->devs; sd; sd = sd->next) {
-               char *dn;
-               int devfd;
-               int ok;
-               if (sd->disk.state & (1<<MD_DISK_FAULTY))
-                       continue;
-               dn = map_dev(sd->disk.major, sd->disk.minor, 1);
-               devfd = dev_open(dn, O_RDONLY);
-               if (devfd < 0)
-                       continue;
-               ok = st->ss->load_super(st, devfd, NULL);
-               close(devfd);
-               if (ok >= 0)
-                       break;
-       }
-       if (!sd) {
-               fprintf(stderr, Name ": %s: Cannot find a superblock\n",
-                       devname);
-               rv = 1;
+       switch(forked ? 0 : fork()) {
+       case -1:
+               fprintf(stderr, Name ": Cannot run child to monitor reshape: %s\n",
+                       strerror(errno));
                abort_reshape(sra);
                goto release;
+       default:
+               return 0;
+       case 0:
+               break;
        }
 
-       memset(&bsb, 0, 512);
-       memcpy(bsb.magic, "md_backup_data-1", 16);
-       st->ss->uuid_from_super(st, (int*)&bsb.set_uuid);
-       bsb.mtime = __cpu_to_le64(time(0));
-       bsb.devstart2 = blocks;
+       close(fd);
+       if (check_env("MDADM_GROW_VERIFY"))
+               fd = open(devname, O_RDONLY | O_DIRECT);
+       else
+               fd = -1;
+       mlockall(MCL_FUTURE);
 
-       stripes = reshape.blocks / (info->array.chunk_size/512) /
-               reshape.before.data_disks;
+       odisks = reshape.before.data_disks + reshape.parity;
 
-       /* Now we just need to kick off the reshape and watch, while
-        * handling backups of the data...
-        * This is all done by a forked background process.
+       if (st->ss->external) {
+               /* metadata handler takes it from here */
+               done = st->ss->manage_reshape(
+                       fd, sra, &reshape, st, blocks,
+                       fdlist, offsets,
+                       d - odisks, fdlist+odisks,
+                       offsets+odisks);
+       } else
+               done = child_monitor(
+                       fd, sra, &reshape, st, blocks,
+                       fdlist, offsets,
+                       d - odisks, fdlist+odisks,
+                       offsets+odisks);
+
+       if (backup_file && done)
+               unlink(backup_file);
+       if (!done) {
+               abort_reshape(sra);
+               goto out;
+       }
+
+       if (!st->ss->external &&
+           !(reshape.before.data_disks != reshape.after.data_disks
+             && info->custom_array_size) &&
+           info->new_level == reshape.level &&
+           !forked) {
+               /* no need to wait for the reshape to finish as
+                * there is nothing more to do.
+                */
+               exit(0);
+       }
+       wait_reshape(sra);
+
+       if (st->ss->external) {
+               /* Re-load the metadata as much could have changed */
+               int cfd = open_dev(st->container_dev);
+               if (cfd >= 0) {
+                       ping_monitor(container);
+                       st->ss->free_super(st);
+                       st->ss->load_container(st, cfd, container);
+                       close(cfd);
+               }
+       }
+
+       /* set new array size if required customer_array_size is used
+        * by this metadata.
         */
-       switch(forked ? 0 : fork()) {
-       case 0:
-               close(fd);
-               if (check_env("MDADM_GROW_VERIFY"))
-                       fd = open(devname, O_RDONLY | O_DIRECT);
-               else
-                       fd = -1;
-               mlockall(MCL_FUTURE);
-
-               odisks = reshape.before.data_disks + reshape.parity;
-
-               if (reshape.before.data_disks < 
-                   reshape.after.data_disks)
-                       done = child_grow(fd, sra, stripes,
-                                         fdlist, offsets,
-                                         odisks,
-                                         info->array.chunk_size,
-                                         reshape.level,
-                                         reshape.before.layout, 
-                                         reshape.before.data_disks,
-                                         d - odisks, fdlist+odisks, offsets+odisks);
-               else if (reshape.before.data_disks >
-                        reshape.after.data_disks)
-                       done = child_shrink(fd, sra, stripes,
-                                           fdlist, offsets,
-                                           odisks, info->array.chunk_size, reshape.level,      
-                                           reshape.before.layout,
-                                           reshape.before.data_disks,
-                                           d - odisks, fdlist+odisks, offsets+odisks);
-               else
-                       done = child_same_size(fd, sra, stripes,
-                                              fdlist, offsets,
-                                              0,
-                                              odisks, info->array.chunk_size, reshape.level,
-                                              reshape.before.layout,
-                                              reshape.before.data_disks,
-                                              d - odisks, fdlist+odisks, offsets+odisks);
-               if (backup_file && done)
-                       unlink(backup_file);
-               if (!done)
-                       abort_reshape(sra);
-               else if (info->new_level != info->array.level) {
-                       /* We need to wait for the reshape to finish
-                        * (which will have happened unless odata < ndata)
-                        * and then set the level
-                        */
+       if (reshape.before.data_disks !=
+           reshape.after.data_disks &&
+           info->custom_array_size) {
+               struct mdinfo *info2;
+               char *subarray = strchr(info->text_version+1, '/')+1;
 
-                       c = map_num(pers, info->new_level);
-                       if (c == NULL) {
-                               if (forked)
-                                       return 1;
-                               exit(0);/* not possible */
+               info2 = st->ss->container_content(st, subarray);
+               if (info2) {
+                       unsigned long long current_size = 0;
+                       unsigned long long new_size =
+                               info2->custom_array_size/2;
+
+                       if (sysfs_get_ll(sra,
+                                        NULL,
+                                        "array_size",
+                                        &current_size) == 0 &&
+                           new_size > current_size) {
+                               if (sysfs_set_num(sra, NULL,
+                                                 "array_size", new_size)
+                                   < 0)
+                                       dprintf("Error: Cannot"
+                                               " set array size");
+                               else
+                                       dprintf("Array size "
+                                               "changed");
+                               dprintf(" from %llu to %llu.\n",
+                                       current_size, new_size);
                        }
+                       sysfs_free(info2);
+               }
+       }
 
-                       if (reshape.before.data_disks < 
-                           reshape.after.data_disks)
-                               wait_reshape(sra);
+       if (info->new_level != reshape.level) {
+
+               c = map_num(pers, info->new_level);
+               if (c) {
                        err = sysfs_set_str(sra, NULL, "level", c);
                        if (err)
-                               fprintf(stderr, Name ": %s: could not set level to %s\n",
-                                       devname, c);
+                               fprintf(stderr, Name\
+                                       ": %s: could not set level "
+                                       "to %s\n", devname, c);
                }
-               if (forked)
-                       return 0;
-               exit(0);
-       case -1:
-               fprintf(stderr, Name ": Cannot run child to monitor reshape: %s\n",
-                       strerror(errno));
-               rv = 1;
-               abort_reshape(sra);
-               break;
-       default:
-               /* The child will take care of unfreezing the array */
-               frozen = 0;
-               break;
        }
+out:
+       if (forked)
+               return 0;
+       exit(0);
 
-
- release:
-       if (rv && orig_level != UnSet && sra) {
+release:
+       if (orig_level != UnSet && sra) {
                c = map_num(pers, orig_level);
                if (c && sysfs_set_str(sra, NULL, "level", c) == 0)
                        fprintf(stderr, Name ": aborting level change\n");
        }
-       unfreeze(st, frozen);
-       return rv;
+       if (!forked)
+               unfreeze(st);
+       return 1;
 }
 
 int reshape_container(char *container, int cfd, char *devname,
@@ -2085,8 +2092,12 @@ int reshape_container(char *container, int cfd, char *devname,
                      char *backup_file,
                      int quiet)
 {
-       struct mdinfo *cc;
-       if (reshape_super(st, info->component_size, info->new_level,
+       struct mdinfo *cc = NULL;
+
+       /* component_size is not meaningful for a container,
+        * so pass '-1' meaning 'no change'
+        */
+       if (reshape_super(st, -1, info->new_level,
                          info->new_layout, info->new_chunk,
                          info->array.raid_disks + info->delta_disks,
                          backup_file, devname, quiet))
@@ -2094,10 +2105,9 @@ int reshape_container(char *container, int cfd, char *devname,
 
        sync_metadata(st);
 
-       cc = st->ss->container_content(st, NULL);
-
-       if (!cc)
-               return 1;
+       /* ping monitor to be sure that update is on disk
+        */
+       ping_monitor(container);
 
        switch (fork()) {
        case -1: /* error */
@@ -2110,36 +2120,59 @@ int reshape_container(char *container, int cfd, char *devname,
                break;
        }
 
-       /* For each member array, we need to perform the reshape */
-       for (; cc; cc = cc->next) {
+       while(1) {
+               /* For each member array with reshape_active,
+                * we need to perform the reshape.
+                * We pick the first array that needs reshaping and
+                * reshape it.  reshape_array() will re-read the metadata
+                * so the next time through a different array should be
+                * ready for reshape.
+                */
+               struct mdinfo *content;
                int rv;
                int fd;
                struct mdstat_ent *mdstat;
-               char *subarray = strchr(cc->text_version+1, '/')+1;
                char *adev;
 
-               if (!cc->reshape_active)
-                       continue;
+               sysfs_free(cc);
 
-               mdstat = mdstat_by_subdev(subarray, devname2devnum(container));
+               cc = st->ss->container_content(st, NULL);
 
-               if (!mdstat)
-                       continue;
-               fd = open_dev_excl(mdstat->devnum);
+               for (content = cc; content ; content = content->next) {
+                       char *subarray;
+                       if (!content->reshape_active)
+                               continue;
+
+                       subarray = strchr(content->text_version+1, '/')+1;
+                       mdstat = mdstat_by_subdev(subarray,
+                                                 devname2devnum(container));
+                       if (!mdstat)
+                               continue;
+                       break;
+               }
+               if (!content)
+                       break;
+
+               fd = open_dev(mdstat->devnum);
                if (fd < 0)
                        break;
                adev = map_dev(dev2major(mdstat->devnum),
                               dev2minor(mdstat->devnum),
                               0);
                if (!adev)
-                       adev = cc->text_version;
+                       adev = content->text_version;
 
-               rv = reshape_array(container, fd, adev, st, cc, force,
-                                  backup_file, quiet, 1);
+               sysfs_init(content, fd, mdstat->devnum);
+
+               rv = reshape_array(container, fd, adev, st,
+                                  content, force,
+                                  backup_file, quiet, 1, 0);
                close(fd);
                if (rv)
                        break;
        }
+       unfreeze(st);
+       sysfs_free(cc);
        exit(0);
 }
 
@@ -2167,10 +2200,314 @@ int reshape_container(char *container, int cfd, char *devname,
  * 
  */
 
+int progress_reshape(struct mdinfo *info, struct reshape *reshape,
+                    unsigned long long backup_point,
+                    unsigned long long wait_point,
+                    unsigned long long *suspend_point,
+                    unsigned long long *reshape_completed)
+{
+       /* This function is called repeatedly by the reshape manager.
+        * It determines how much progress can safely be made and allows
+        * that progress.
+        * - 'info' identifies the array and particularly records in
+        *    ->reshape_progress the metadata's knowledge of progress
+        *      This is a sector offset from the start of the array
+        *      of the next array block to be relocated.  This number
+        *      may increase from 0 or decrease from array_size, depending
+        *      on the type of reshape that is happening.
+        *    Note that in contrast, 'sync_completed' is a block count of the
+        *    reshape so far.  It gives the distance between the start point
+        *    (head or tail of device) and the next place that data will be
+        *    written.  It always increases.
+        * - 'reshape' is the structure created by analyse_change
+        * - 'backup_point' shows how much the metadata manager has backed-up
+        *   data.  For reshapes with increasing progress, it is the next address
+        *   to be backed up, previous addresses have been backed-up.  For
+        *   decreasing progress, it is the earliest address that has been
+        *   backed up - later address are also backed up.
+        *   So addresses between reshape_progress and backup_point are
+        *   backed up providing those are in the 'correct' order.
+        * - 'wait_point' is an array address.  When reshape_completed
+        *   passes this point, progress_reshape should return.  It might
+        *   return earlier if it determines that ->reshape_progress needs
+        *   to be updated or further backup is needed.
+        * - suspend_point is maintained by progress_reshape and the caller
+        *   should not touch it except to initialise to zero.
+        *   It is an array address and it only increases in 2.6.37 and earlier.
+        *   This makes it difficult to handle reducing reshapes with
+        *   external metadata.
+        *   However:  it is similar to backup_point in that it records the
+        *     other end of a suspended region from  reshape_progress.
+        *     it is moved to extend the region that is safe to backup and/or
+        *     reshape
+        * - reshape_completed is read from sysfs and returned.  The caller
+        *   should copy this into ->reshape_progress when it has reason to
+        *   believe that the metadata knows this, and any backup outside this
+        *   has been erased.
+        *
+        * Return value is:
+        *   1 if more data from backup_point - but only as far as suspend_point,
+        *     should be backed up
+        *   0 if things are progressing smoothly
+        *  -1 if the reshape is finished, either because it is all done,
+        *     or due to an error.
+        */
+
+       int advancing = (reshape->after.data_disks
+                        >= reshape->before.data_disks);
+       unsigned long long need_backup; /* All data between start of array and
+                                        * here will at some point need to
+                                        * be backed up.
+                                        */
+       unsigned long long read_offset, write_offset;
+       unsigned long long write_range;
+       unsigned long long max_progress, target, completed;
+       unsigned long long array_size = (info->component_size
+                                        * reshape->before.data_disks);
+       int fd;
+       char buf[20];
+
+       /* First, we unsuspend any region that is now known to be safe.
+        * If suspend_point is on the 'wrong' side of reshape_progress, then
+        * we don't have or need suspension at the moment.  This is true for
+        * native metadata when we don't need to back-up.
+        */
+       if (advancing) {
+               if (info->reshape_progress <= *suspend_point)
+                       sysfs_set_num(info, NULL, "suspend_lo",
+                                     info->reshape_progress);
+       } else {
+               /* Note: this won't work in 2.6.37 and before.
+                * Something somewhere should make sure we don't need it!
+                */
+               if (info->reshape_progress >= *suspend_point)
+                       sysfs_set_num(info, NULL, "suspend_hi",
+                                     info->reshape_progress);
+       }
+
+       /* Now work out how far it is safe to progress.
+        * If the read_offset for ->reshape_progress is less than
+        * 'blocks' beyond the write_offset, we can only progress as far
+        * as a backup.
+        * Otherwise we can progress until the write_offset for the new location
+        * reaches (within 'blocks' of) the read_offset at the current location.
+        * However that region must be suspended unless we are using native
+        * metadata.
+        * If we need to suspend more, we limit it to 128M per device, which is
+        * rather arbitrary and should be some time-based calculation.
+        */
+       read_offset = info->reshape_progress / reshape->before.data_disks;
+       write_offset = info->reshape_progress / reshape->after.data_disks;
+       write_range = info->new_chunk/512;
+       if (reshape->before.data_disks == reshape->after.data_disks)
+               need_backup = array_size;
+       else
+               need_backup = reshape->backup_blocks;
+       if (advancing) {
+               if (read_offset < write_offset + write_range)
+                       max_progress = backup_point;
+               else
+                       max_progress =
+                               read_offset *
+                               reshape->after.data_disks;
+       } else {
+               if (read_offset > write_offset - write_range)
+                       /* Can only progress as far as has been backed up,
+                        * which must be suspended */
+                       max_progress = backup_point;
+               else if (info->reshape_progress <= need_backup)
+                       max_progress = backup_point;
+               else {
+                       if (info->array.major_version >= 0)
+                               /* Can progress until backup is needed */
+                               max_progress = need_backup;
+                       else {
+                               /* Can progress until metadata update is required */
+                               max_progress =
+                                       read_offset *
+                                       reshape->after.data_disks;
+                               /* but data must be suspended */
+                               if (max_progress < *suspend_point)
+                                       max_progress = *suspend_point;
+                       }
+               }
+       }
+
+       /* We know it is safe to progress to 'max_progress' providing
+        * it is suspended or we are using native metadata.
+        * Consider extending suspend_point 128M per device if it
+        * is less than 64M per device beyond reshape_progress.
+        * But always do a multiple of 'blocks'
+        * FIXME this is too big - it takes to long to complete
+        * this much.
+        */
+       target = 64*1024*2 * min(reshape->before.data_disks,
+                                 reshape->after.data_disks);
+       target /= reshape->backup_blocks;
+       if (target < 2)
+               target = 2;
+       target *= reshape->backup_blocks;
+
+       /* For externally managed metadata we always need to suspend IO to
+        * the area being reshaped so we regularly push suspend_point forward.
+        * For native metadata we only need the suspend if we are going to do
+        * a backup.
+        */
+       if (advancing) {
+               if ((need_backup > info->reshape_progress
+                    || info->array.major_version < 0) &&
+                   *suspend_point < info->reshape_progress + target) {
+                       if (need_backup < *suspend_point + 2 * target)
+                               *suspend_point = need_backup;
+                       else if (*suspend_point + 2 * target < array_size)
+                               *suspend_point += 2 * target;
+                       else
+                               *suspend_point = array_size;
+                       sysfs_set_num(info, NULL, "suspend_hi", *suspend_point);
+                       if (max_progress > *suspend_point)
+                               max_progress = *suspend_point;
+               }
+       } else {
+               if (info->array.major_version >= 0) {
+                       /* Only need to suspend when about to backup */
+                       if (info->reshape_progress < need_backup * 2 &&
+                           *suspend_point > 0) {
+                               *suspend_point = 0;
+                               sysfs_set_num(info, NULL, "suspend_lo", 0);
+                               sysfs_set_num(info, NULL, "suspend_hi", need_backup);
+                       }
+               } else {
+                       /* Need to suspend continually */
+                       if (info->reshape_progress < *suspend_point)
+                               *suspend_point = info->reshape_progress;
+                       if (*suspend_point + target < info->reshape_progress)
+                               /* No need to move suspend region yet */;
+                       else {
+                               if (*suspend_point >= 2 * target)
+                                       *suspend_point -= 2 * target;
+                               else
+                                       *suspend_point = 0;
+                               sysfs_set_num(info, NULL, "suspend_lo",
+                                             *suspend_point);
+                       }
+                       if (max_progress < *suspend_point)
+                               max_progress = *suspend_point;
+               }
+       }
+
+       /* now set sync_max to allow that progress. sync_max, like
+        * sync_completed is a count of sectors written per device, so
+        * we find the difference between max_progress and the start point,
+        * and divide that by after.data_disks to get a sync_max
+        * number.
+        * At the same time we convert wait_point to a similar number
+        * for comparing against sync_completed.
+        */
+       /* scale down max_progress to per_disk */
+       max_progress /= reshape->after.data_disks;
+       /* Round to chunk size as some kernels give an erroneously high number */
+       max_progress /= info->new_chunk/512;
+       max_progress *= info->new_chunk/512;
+       /* Limit progress to the whole device */
+       if (max_progress > info->component_size)
+               max_progress = info->component_size;
+       wait_point /= reshape->after.data_disks;
+       if (!advancing) {
+               /* switch from 'device offset' to 'processed block count' */
+               max_progress = info->component_size - max_progress;
+               wait_point = info->component_size - wait_point;
+       }
+
+       sysfs_set_num(info, NULL, "sync_max", max_progress);
+
+       /* Now wait.  If we have already reached the point that we were
+        * asked to wait to, don't wait at all, else wait for any change.
+        * We need to select on 'sync_completed' as that is the place that
+        * notifications happen, but we are really interested in
+        * 'reshape_position'
+        */
+       fd = sysfs_get_fd(info, NULL, "sync_completed");
+       if (fd < 0)
+               goto check_progress;
+
+       if (sysfs_fd_get_ll(fd, &completed) < 0) {
+               close(fd);
+               goto check_progress;
+       }
+       while (completed < max_progress && completed < wait_point) {
+               /* Check that sync_action is still 'reshape' to avoid
+                * waiting forever on a dead array
+                */
+               char action[20];
+               fd_set rfds;
+               if (sysfs_get_str(info, NULL, "sync_action",
+                                 action, 20) <= 0 ||
+                   strncmp(action, "reshape", 7) != 0)
+                       break;
+               /* Some kernels reset 'sync_completed' to zero
+                * before setting 'sync_action' to 'idle'.
+                * So we need these extra tests.
+                */
+               if (completed == 0 && advancing
+                   && info->reshape_progress > 0)
+                       break;
+               if (completed == 0 && !advancing
+                   && info->reshape_progress < (info->component_size
+                                                * reshape->after.data_disks))
+                       break;
+               FD_ZERO(&rfds);
+               FD_SET(fd, &rfds);
+               select(fd+1, NULL, NULL, &rfds, NULL);
+               if (sysfs_fd_get_ll(fd, &completed) < 0) {
+                       close(fd);
+                       goto check_progress;
+               }
+       }
+       /* some kernels can give an incorrectly high 'completed' number */
+       completed /= (info->new_chunk/512);
+       completed *= (info->new_chunk/512);
+       /* Convert 'completed' back in to a 'progress' number */
+       completed *= reshape->after.data_disks;
+       if (!advancing) {
+               completed = info->component_size * reshape->after.data_disks
+                       - completed;
+       }
+       *reshape_completed = completed;
+       
+       close(fd);
+
+       /* We return the need_backup flag.  Caller will decide
+        * how much - a multiple of ->backup_blocks up to *suspend_point
+        */
+       if (advancing)
+               return need_backup > info->reshape_progress;
+       else
+               return need_backup >= info->reshape_progress;
+
+check_progress:
+       /* if we couldn't read a number from sync_completed, then
+        * either the reshape did complete, or it aborted.
+        * We can tell which by checking for 'none' in reshape_position.
+        */
+       strcpy(buf, "hi");
+       if (sysfs_get_str(info, NULL, "reshape_position", buf, sizeof(buf)) < 0
+           || strncmp(buf, "none", 4) != 0)
+               return -2; /* abort */
+       else {
+               /* Maybe racing with array shutdown - check state */
+               if (sysfs_get_str(info, NULL, "array_state", buf, sizeof(buf)) < 0
+                   || strncmp(buf, "inactive", 8) == 0
+                   || strncmp(buf, "clear",5) == 0)
+                       return -2; /* abort */
+               return -1; /* complete */
+       }
+}
+
+
 /* FIXME return status is never checked */
 static int grow_backup(struct mdinfo *sra,
                unsigned long long offset, /* per device */
-               unsigned long stripes, /* per device */
+               unsigned long stripes, /* per device, in old chunks */
                int *sources, unsigned long long *offsets,
                int disks, int chunk, int level, int layout,
                int dests, int *destfd, unsigned long long *destoffsets,
@@ -2193,9 +2530,10 @@ static int grow_backup(struct mdinfo *sra,
                odata--;
        if (level == 6)
                odata--;
-       sysfs_set_num(sra, NULL, "suspend_hi", (offset + stripes * (chunk/512)) * odata);
+
        /* Check that array hasn't become degraded, else we might backup the wrong data */
-       sysfs_get_ll(sra, NULL, "degraded", &ll);
+       if (sysfs_get_ll(sra, NULL, "degraded", &ll) < 0)
+               return -1; /* FIXME this error is ignored */
        new_degraded = (int)ll;
        if (new_degraded != *degraded) {
                /* check each device to ensure it is still working */
@@ -2283,46 +2621,16 @@ static int grow_backup(struct mdinfo *sra,
  * every works.
  */
 /* FIXME return value is often ignored */
-static int wait_backup(struct mdinfo *sra,
-               unsigned long long offset, /* per device */
-               unsigned long long blocks, /* per device */
-               unsigned long long blocks2, /* per device - hack */
+static int forget_backup(
                int dests, int *destfd, unsigned long long *destoffsets,
                int part)
 {
-       /* Wait for resync to pass the section that was backed up
-        * then erase the backup and allow IO
+       /* 
+        * Erase backup 'part' (which is 0 or 1)
         */
-       int fd = sysfs_get_fd(sra, NULL, "sync_completed");
-       unsigned long long completed;
        int i;
        int rv;
 
-       if (fd < 0)
-               return -1;
-       sysfs_set_num(sra, NULL, "sync_max", offset + blocks + blocks2);
-
-       if (sysfs_fd_get_ll(fd, &completed) < 0) {
-               close(fd);
-               return -1;
-       }
-       while (completed < offset + blocks) {
-               char action[20];
-               fd_set rfds;
-               FD_ZERO(&rfds);
-               FD_SET(fd, &rfds);
-               select(fd+1, NULL, NULL, &rfds, NULL);
-               if (sysfs_fd_get_ll(fd, &completed) < 0) {
-                       close(fd);
-                       return -1;
-               }
-               if (sysfs_get_str(sra, NULL, "sync_action",
-                                 action, 20) > 0 &&
-                   strncmp(action, "reshape", 7) != 0)
-                       break;
-       }
-       close(fd);
-
        if (part) {
                bsb.arraystart2 = __cpu_to_le64(0);
                bsb.length2 = __cpu_to_le64(0);
@@ -2442,130 +2750,191 @@ static void validate(int afd, int bfd, unsigned long long offset)
        }
 }
 
-static int child_grow(int afd, struct mdinfo *sra, unsigned long stripes,
-                     int *fds, unsigned long long *offsets,
-                     int disks, int chunk, int level, int layout, int data,
-                     int dests, int *destfd, unsigned long long *destoffsets)
+int child_monitor(int afd, struct mdinfo *sra, struct reshape *reshape,
+                 struct supertype *st, unsigned long blocks,
+                 int *fds, unsigned long long *offsets,
+                 int dests, int *destfd, unsigned long long *destoffsets)
 {
+       /* Monitor a reshape where backup is being performed using
+        * 'native' mechanism - either to a backup file, or
+        * to some space in a spare.
+        */
        char *buf;
-       int degraded = 0;
+       int degraded = -1;
+       unsigned long long speed;
+       unsigned long long suspend_point, array_size;
+       unsigned long long backup_point, wait_point;
+       unsigned long long reshape_completed;
+       int done = 0;
+       int increasing = reshape->after.data_disks >= reshape->before.data_disks;
+       int part = 0; /* The next part of the backup area to fill.  It may already
+                      * be full, so we need to check */
+       int level = reshape->level;
+       int layout = reshape->before.layout;
+       int data = reshape->before.data_disks;
+       int disks = reshape->before.data_disks + reshape->parity;
+       int chunk = sra->array.chunk_size;
+       struct mdinfo *sd;
+       unsigned long stripes;
 
-       if (posix_memalign((void**)&buf, 4096, disks * chunk))
-               /* Don't start the 'reshape' */
+       /* set up the backup-super-block.  This requires the
+        * uuid from the array.
+        */
+       /* Find a superblock */
+       for (sd = sra->devs; sd; sd = sd->next) {
+               char *dn;
+               int devfd;
+               int ok;
+               if (sd->disk.state & (1<<MD_DISK_FAULTY))
+                       continue;
+               dn = map_dev(sd->disk.major, sd->disk.minor, 1);
+               devfd = dev_open(dn, O_RDONLY);
+               if (devfd < 0)
+                       continue;
+               ok = st->ss->load_super(st, devfd, NULL);
+               close(devfd);
+               if (ok >= 0)
+                       break;
+       }
+       if (!sd) {
+               fprintf(stderr, Name ": Cannot find a superblock\n");
                return 0;
-       grow_backup(sra, 0, stripes,
-                   fds, offsets, disks, chunk, level, layout,
-                   dests, destfd, destoffsets,
-                   0, &degraded, buf);
-       validate(afd, destfd[0], destoffsets[0]);
-       wait_backup(sra, 0, stripes * (chunk / 512), stripes * (chunk / 512),
-                   dests, destfd, destoffsets,
-                   0);
-       sysfs_set_num(sra, NULL, "suspend_lo", (stripes * (chunk/512)) * data);
-       free(buf);
-       /* FIXME this should probably be numeric */
-       sysfs_set_str(sra, NULL, "sync_max", "max");
-       return 1;
-}
+       }
 
-static int child_shrink(int afd, struct mdinfo *sra, unsigned long stripes,
-                       int *fds, unsigned long long *offsets,
-                       int disks, int chunk, int level, int layout, int data,
-                       int dests, int *destfd, unsigned long long *destoffsets)
-{
-       char *buf;
-       unsigned long long start;
-       int rv;
-       int degraded = 0;
+       memset(&bsb, 0, 512);
+       memcpy(bsb.magic, "md_backup_data-1", 16);
+       st->ss->uuid_from_super(st, (int*)&bsb.set_uuid);
+       bsb.mtime = __cpu_to_le64(time(0));
+       bsb.devstart2 = blocks;
+
+       stripes = blocks / (sra->array.chunk_size/512) /
+               reshape->before.data_disks;
 
        if (posix_memalign((void**)&buf, 4096, disks * chunk))
+               /* Don't start the 'reshape' */
                return 0;
-       start = sra->component_size - stripes * (chunk/512);
-       sysfs_set_num(sra, NULL, "sync_max", start);
-       rv = wait_backup(sra, 0, start - stripes * (chunk/512), stripes * (chunk/512),
-                        dests, destfd, destoffsets, 0);
-       if (rv < 0)
-               return 0;
-       grow_backup(sra, 0, stripes,
-                   fds, offsets,
-                   disks, chunk, level, layout,
-                   dests, destfd, destoffsets,
-                   0, &degraded, buf);
-       validate(afd, destfd[0], destoffsets[0]);
-       wait_backup(sra, start, stripes*(chunk/512), 0,
-                   dests, destfd, destoffsets, 0);
-       sysfs_set_num(sra, NULL, "suspend_lo", (stripes * (chunk/512)) * data);
-       free(buf);
-       /* FIXME this should probably be numeric */
-       sysfs_set_str(sra, NULL, "sync_max", "max");
-       return 1;
-}
+       if (reshape->before.data_disks == reshape->after.data_disks) {
+               sysfs_get_ll(sra, NULL, "sync_speed_min", &speed);
+               sysfs_set_num(sra, NULL, "sync_speed_min", 200000);
+       }
 
-static int child_same_size(int afd, struct mdinfo *sra, unsigned long stripes,
-                          int *fds, unsigned long long *offsets,
-                          unsigned long long start,
-                          int disks, int chunk, int level, int layout, int data,
-                          int dests, int *destfd, unsigned long long *destoffsets)
-{
-       unsigned long long size;
-       unsigned long tailstripes = stripes;
-       int part;
-       char *buf;
-       unsigned long long speed;
-       int degraded = 0;
+       if (increasing) {
+               array_size = sra->component_size * reshape->after.data_disks;
+               backup_point = sra->reshape_progress;
+               suspend_point = 0;
+       } else {
+               array_size = sra->component_size * reshape->before.data_disks;
+               backup_point = reshape->backup_blocks;
+               suspend_point = array_size;
+       }
+
+       while (!done) {
+               int rv;
 
+               /* Want to return as soon the oldest backup slot can
+                * be released as that allows us to start backing up
+                * some more, providing suspend_point has been
+                * advanced, which it should have.
+                */
+               if (increasing) {
+                       wait_point = array_size;
+                       if (part == 0 && __le64_to_cpu(bsb.length) > 0)
+                               wait_point = (__le64_to_cpu(bsb.arraystart) +
+                                             __le64_to_cpu(bsb.length));
+                       if (part == 1 && __le64_to_cpu(bsb.length2) > 0)
+                               wait_point = (__le64_to_cpu(bsb.arraystart2) +
+                                             __le64_to_cpu(bsb.length2));
+               } else {
+                       wait_point = 0;
+                       if (part == 0 && __le64_to_cpu(bsb.length) > 0)
+                               wait_point = __le64_to_cpu(bsb.arraystart);
+                       if (part == 1 && __le64_to_cpu(bsb.length2) > 0)
+                               wait_point = __le64_to_cpu(bsb.arraystart2);
+               }
+
+               rv = progress_reshape(sra, reshape,
+                                     backup_point, wait_point,
+                                     &suspend_point, &reshape_completed);
+               /* external metadata would need to ping_monitor here */
+               sra->reshape_progress = reshape_completed;
+
+               /* Clear any backup region that is before 'here' */
+               if (increasing) {
+                       if (reshape_completed >= (__le64_to_cpu(bsb.arraystart) +
+                                                 __le64_to_cpu(bsb.length)))
+                               forget_backup(dests, destfd,
+                                             destoffsets, 0);
+                       if (reshape_completed >= (__le64_to_cpu(bsb.arraystart2) +
+                                                 __le64_to_cpu(bsb.length2)))
+                               forget_backup(dests, destfd,
+                                             destoffsets, 1);
+               } else {
+                       if (reshape_completed <= (__le64_to_cpu(bsb.arraystart)))
+                               forget_backup(dests, destfd,
+                                             destoffsets, 0);
+                       if (reshape_completed <= (__le64_to_cpu(bsb.arraystart2)))
+                               forget_backup(dests, destfd,
+                                             destoffsets, 1);
+               }
 
-       if (posix_memalign((void**)&buf, 4096, disks * chunk))
-               return 0;
+               if (rv < 0) {
+                       if (rv == -1)
+                               done = 1;
+                       break;
+               }
 
-       sysfs_get_ll(sra, NULL, "sync_speed_min", &speed);
-       sysfs_set_num(sra, NULL, "sync_speed_min", 200000);
-
-       grow_backup(sra, start, stripes,
-                   fds, offsets,
-                   disks, chunk, level, layout,
-                   dests, destfd, destoffsets,
-                   0, &degraded, buf);
-       grow_backup(sra, (start + stripes) * (chunk/512), stripes,
-                   fds, offsets,
-                   disks, chunk, level, layout,
-                   dests, destfd, destoffsets,
-                   1, &degraded, buf);
-       validate(afd, destfd[0], destoffsets[0]);
-       part = 0;
-       start += stripes * 2; /* where to read next */
-       size = sra->component_size / (chunk/512);
-       while (start < size) {
-               if (wait_backup(sra, (start-stripes*2)*(chunk/512),
-                               stripes*(chunk/512), 0,
-                               dests, destfd, destoffsets,
-                               part) < 0)
-                       return 0;
-               sysfs_set_num(sra, NULL, "suspend_lo", start*(chunk/512) * data);
-               if (start + stripes > size)
-                       tailstripes = (size - start);
-
-               grow_backup(sra, start*(chunk/512), tailstripes,
-                           fds, offsets,
-                           disks, chunk, level, layout,
-                           dests, destfd, destoffsets,
-                           part, &degraded, buf);
-               start += stripes;
-               part = 1 - part;
-               validate(afd, destfd[0], destoffsets[0]);
-       }
-       if (wait_backup(sra, (start-stripes*2) * (chunk/512), stripes * (chunk/512), 0,
-                       dests, destfd, destoffsets,
-                       part) < 0)
-               return 0;
-       sysfs_set_num(sra, NULL, "suspend_lo", ((start-stripes)*(chunk/512)) * data);
-       wait_backup(sra, (start-stripes) * (chunk/512), tailstripes * (chunk/512), 0,
-                   dests, destfd, destoffsets,
-                   1-part);
-       sysfs_set_num(sra, NULL, "suspend_lo", (size*(chunk/512)) * data);
-       sysfs_set_num(sra, NULL, "sync_speed_min", speed);
+               while (rv) {
+                       unsigned long long offset;
+                       unsigned long actual_stripes;
+                       /* Need to backup some data.
+                        * If 'part' is not used and the desired
+                        * backup size is suspended, do a backup,
+                        * then consider the next part.
+                        */
+                       /* Check that 'part' is unused */
+                       if (part == 0 && __le64_to_cpu(bsb.length) != 0)
+                               break;
+                       if (part == 1 && __le64_to_cpu(bsb.length2) != 0)
+                               break;
+
+                       offset = backup_point / data;
+                       actual_stripes = stripes;
+                       if (increasing) {
+                               if (offset + actual_stripes * (chunk/512) >
+                                   sra->component_size)
+                                       actual_stripes = ((sra->component_size - offset)
+                                                         / (chunk/512));
+                               if (offset + actual_stripes * (chunk/512) >
+                                   suspend_point/data)
+                                       break;
+                       } else {
+                               if (offset < actual_stripes * (chunk/512))
+                                       actual_stripes = offset / (chunk/512);
+                               offset -= actual_stripes * (chunk/512);
+                               if (offset < suspend_point/data)
+                                       break;
+                       }
+                       grow_backup(sra, offset, actual_stripes,
+                                   fds, offsets,
+                                   disks, chunk, level, layout,
+                                   dests, destfd, destoffsets,
+                                   part, &degraded, buf);
+                       validate(afd, destfd[0], destoffsets[0]);
+                       /* record where 'part' is up to */
+                       part = !part;
+                       if (increasing)
+                               backup_point += actual_stripes * (chunk/512) * data;
+                       else
+                               backup_point -= actual_stripes * (chunk/512) * data;
+               }
+       }
+
+       /* FIXME maybe call progress_reshape one more time instead */
+       abort_reshape(sra); /* remove any remaining suspension */
+       if (reshape->before.data_disks == reshape->after.data_disks)
+               sysfs_set_num(sra, NULL, "sync_speed_min", speed);
        free(buf);
-       return 1;
+       return done;
 }
 
 /*
@@ -2602,6 +2971,7 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt
                int fd;
                int bsbsize;
                char *devname, namebuf[20];
+               unsigned long long lo, hi;
 
                /* This was a spare and may have some saved data on it.
                 * Load the superblock, find and load the
@@ -2685,42 +3055,52 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt
                }
 
                if (bsb.magic[15] == '1') {
-               if (info->delta_disks >= 0) {
-                       /* reshape_progress is increasing */
-                       if (__le64_to_cpu(bsb.arraystart) + __le64_to_cpu(bsb.length) <
-                           info->reshape_progress) {
-                       nonew:
-                               if (verbose)
-                                       fprintf(stderr, Name ": backup-metadata found on %s but is not needed\n", devname);
-                               continue; /* No new data here */
+                       if (bsb.length == 0)
+                               continue;
+                       if (info->delta_disks >= 0) {
+                               /* reshape_progress is increasing */
+                               if (__le64_to_cpu(bsb.arraystart)
+                                   + __le64_to_cpu(bsb.length)
+                                   < info->reshape_progress) {
+                               nonew:
+                                       if (verbose)
+                                               fprintf(stderr, Name
+                  ": backup-metadata found on %s but is not needed\n", devname);
+                                       continue; /* No new data here */
+                               }
+                       } else {
+                               /* reshape_progress is decreasing */
+                               if (__le64_to_cpu(bsb.arraystart) >=
+                                   info->reshape_progress)
+                                       goto nonew; /* No new data here */
                        }
                } else {
-                       /* reshape_progress is decreasing */
-                       if (__le64_to_cpu(bsb.arraystart) >=
-                           info->reshape_progress)
-                               goto nonew; /* No new data here */
-               }
-               } else {
-               if (info->delta_disks >= 0) {
-                       /* reshape_progress is increasing */
-                       if (__le64_to_cpu(bsb.arraystart) + __le64_to_cpu(bsb.length) <
-                           info->reshape_progress &&
-                           __le64_to_cpu(bsb.arraystart2) + __le64_to_cpu(bsb.length2) <
-                           info->reshape_progress)
-                               goto nonew; /* No new data here */
-               } else {
-                       /* reshape_progress is decreasing */
-                       if (__le64_to_cpu(bsb.arraystart) >=
-                           info->reshape_progress &&
-                           __le64_to_cpu(bsb.arraystart2) >=
-                           info->reshape_progress)
-                               goto nonew; /* No new data here */
-               }
+                       if (bsb.length == 0 && bsb.length2 == 0)
+                               continue;
+                       if (info->delta_disks >= 0) {
+                               /* reshape_progress is increasing */
+                               if ((__le64_to_cpu(bsb.arraystart)
+                                    + __le64_to_cpu(bsb.length)
+                                    < info->reshape_progress)
+                                   &&
+                                   (__le64_to_cpu(bsb.arraystart2)
+                                    + __le64_to_cpu(bsb.length2)
+                                    < info->reshape_progress))
+                                       goto nonew; /* No new data here */
+                       } else {
+                               /* reshape_progress is decreasing */
+                               if (__le64_to_cpu(bsb.arraystart) >=
+                                   info->reshape_progress &&
+                                   __le64_to_cpu(bsb.arraystart2) >=
+                                   info->reshape_progress)
+                                       goto nonew; /* No new data here */
+                       }
                }
                if (lseek64(fd, __le64_to_cpu(bsb.devstart)*512, 0)< 0) {
                second_fail:
                        if (verbose)
-                               fprintf(stderr, Name ": Failed to verify secondary backup-metadata block on %s\n",
+                               fprintf(stderr, Name
+                    ": Failed to verify secondary backup-metadata block on %s\n",
                                        devname);
                        continue; /* Cannot seek */
                }
@@ -2784,7 +3164,28 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt
 
                /* Ok, so the data is restored. Let's update those superblocks. */
 
-               if (info->delta_disks >= 0) {
+               lo = hi = 0;
+               if (bsb.length) {
+                       lo = __le64_to_cpu(bsb.arraystart);
+                       hi = lo + __le64_to_cpu(bsb.length);
+               }
+               if (bsb.magic[15] == '2' && bsb.length2) {
+                       unsigned long long lo1, hi1;
+                       lo1 = __le64_to_cpu(bsb.arraystart2);
+                       hi1 = lo1 + __le64_to_cpu(bsb.length2);
+                       if (lo == hi) {
+                               lo = lo1;
+                               hi = hi1;
+                       } else if (lo < lo1)
+                               hi = hi1;
+                       else
+                               lo = lo1;
+               }
+               if (lo < hi &&
+                   (info->reshape_progress < lo ||
+                    info->reshape_progress > hi))
+                       /* backup does not affect reshape_progress*/ ;
+               else if (info->delta_disks >= 0) {
                        info->reshape_progress = __le64_to_cpu(bsb.arraystart) +
                                __le64_to_cpu(bsb.length);
                        if (bsb.magic[15] == '2') {
@@ -2859,164 +3260,10 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt
 int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info,
                  char *backup_file)
 {
-       /* Array is assembled and ready to be started, but
-        * monitoring is probably required.
-        * So:
-        *   - start read-only
-        *   - set upper bound for resync
-        *   - initialise the 'suspend' boundaries
-        *   - switch to read-write
-        *   - fork and continue monitoring
-        */
-       int err;
-       int backup_list[1];
-       unsigned long long backup_offsets[1];
-       int odisks, ndisks, ochunk, nchunk,odata,ndata;
-       unsigned long a,b,blocks,stripes;
-       int backup_fd;
-       int *fds;
-       unsigned long long *offsets;
-       int d;
-       struct mdinfo *sra, *sd;
-       int rv;
-       unsigned long cache;
-       int done = 0;
-
-       err = sysfs_set_str(info, NULL, "array_state", "readonly");
+       int err = sysfs_set_str(info, NULL, "array_state", "readonly");
        if (err)
                return err;
-
-       /* make sure reshape doesn't progress until we are ready */
-       sysfs_set_str(info, NULL, "sync_max", "0");
-       sysfs_set_str(info, NULL, "array_state", "active"); /* FIXME or clean */
-
-       sra = sysfs_read(-1, devname2devnum(info->sys_name),
-                        GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE|
-                        GET_CACHE);
-       if (!sra)
-               return 1;
-
-       /* ndisks is not growing, so raid_disks is old and +delta is new */
-       odisks = info->array.raid_disks;
-       ndisks = odisks + info->delta_disks;
-       odata = odisks - 1;
-       ndata = ndisks - 1;
-       if (info->array.level == 6) {
-               odata--;
-               ndata--;
-       }
-       ochunk = info->array.chunk_size;
-       nchunk = info->new_chunk;
-
-       a = (ochunk/512) * odata;
-       b = (nchunk/512) * ndata;
-       /* Find GCD */
-       while (a != b) {
-               if (a < b)
-                       b -= a;
-               if (b < a)
-                       a -= b;
-       }
-       /* LCM == product / GCD */
-       blocks = (ochunk/512) * (nchunk/512) * odata * ndata / a;
-
-       if (ndata == odata)
-               while (blocks * 32 < sra->component_size &&
-                      blocks < 16*1024*2)
-                       blocks *= 2;
-       stripes = blocks / (info->array.chunk_size/512) / odata;
-
-       /* check that the internal stripe cache is
-        * large enough, or it won't work.
-        */
-       cache = (nchunk < ochunk) ? ochunk : nchunk;
-       cache = cache * 4 / 4096;
-       if (cache < blocks / 8 / odisks + 16)
-               /* Make it big enough to hold 'blocks' */
-               cache = blocks / 8 / odisks + 16;
-       if (sra->cache_size < cache)
-               sysfs_set_num(sra, NULL, "stripe_cache_size",
-                             cache+1);
-
-       memset(&bsb, 0, 512);
-       memcpy(bsb.magic, "md_backup_data-1", 16);
-       memcpy(&bsb.set_uuid, info->uuid, 16);
-       bsb.mtime = __cpu_to_le64(time(0));
-       bsb.devstart2 = blocks;
-
-       backup_fd = open(backup_file, O_RDWR|O_CREAT, S_IRUSR | S_IWUSR);
-       if (backup_fd < 0) {
-               fprintf(stderr, Name ": Cannot open backup file %s\n",
-                       backup_file ?: "- no backup-file given");
-               return 1;
-       }
-       backup_list[0] = backup_fd;
-       backup_offsets[0] = 8 * 512;
-       fds = malloc(odisks * sizeof(fds[0]));
-       offsets = malloc(odisks * sizeof(offsets[0]));
-       for (d=0; d<odisks; d++)
-               fds[d] = -1;
-
-       for (sd = sra->devs; sd; sd = sd->next) {
-               if (sd->disk.state & (1<<MD_DISK_FAULTY))
-                       continue;
-               if (sd->disk.state & (1<<MD_DISK_SYNC)) {
-                       char *dn = map_dev(sd->disk.major,
-                                          sd->disk.minor, 1);
-                       fds[sd->disk.raid_disk]
-                               = dev_open(dn, O_RDONLY);
-                       offsets[sd->disk.raid_disk] = sd->data_offset*512;
-                       if (fds[sd->disk.raid_disk] < 0) {
-                               fprintf(stderr, Name ": %s: cannot open component %s\n",
-                                       info->sys_name, dn?dn:"-unknown-");
-                               rv = 1;
-                               goto release;
-                       }
-                       free(dn);
-               }
-       }
-
-       switch(fork()) {
-       case 0:
-               close(mdfd);
-               mlockall(MCL_FUTURE);
-               if (info->delta_disks < 0)
-                       done = child_shrink(-1, info, stripes,
-                                           fds, offsets,
-                                           info->array.raid_disks,
-                                           info->array.chunk_size,
-                                           info->array.level, info->array.layout,
-                                           odata,
-                                           1, backup_list, backup_offsets);
-               else if (info->delta_disks == 0) {
-                       /* The 'start' is a per-device stripe number.
-                        * reshape_progress is a per-array sector number.
-                        * So divide by ndata * chunk_size
-                        */
-                       unsigned long long start = info->reshape_progress / ndata;
-                       start /= (info->array.chunk_size/512);
-                       done = child_same_size(-1, info, stripes,
-                                              fds, offsets,
-                                              start,
-                                              info->array.raid_disks,
-                                              info->array.chunk_size,
-                                              info->array.level, info->array.layout,
-                                              odata,
-                                              1, backup_list, backup_offsets);
-               }
-               if (backup_file && done)
-                       unlink(backup_file);
-               /* FIXME should I intuit a level change */
-               exit(0);
-       case -1:
-               fprintf(stderr, Name ": Cannot run child to continue monitoring reshape: %s\n",
-                       strerror(errno));
-               return 1;
-       default:
-               break;
-       }
-release:
-       return 0;
+       return reshape_array(NULL, mdfd, "array", st, info, 1, backup_file, 0, 0, 1);
 }