]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - Grow.c
Don't complain about missing spares when reshaping a raid0.
[thirdparty/mdadm.git] / Grow.c
diff --git a/Grow.c b/Grow.c
index 25c55ccba6938ebf65cc4122acd9142fa4f85e62..109b0c19a56b0b94229e921e4d3ddc4f2e35f99e 100644 (file)
--- a/Grow.c
+++ b/Grow.c
@@ -634,6 +634,7 @@ static int subarray_set_num(char *container, struct mdinfo *sra, char *name, int
 int start_reshape(struct mdinfo *sra)
 {
        int err;
+       sysfs_set_num(sra, NULL, "suspend_lo", 0x7FFFFFFFFFFFFFFFULL);
        err = sysfs_set_num(sra, NULL, "suspend_hi", 0);
        err = err ?: sysfs_set_num(sra, NULL, "suspend_lo", 0);
        err = err ?: sysfs_set_num(sra, NULL, "sync_min", 0);
@@ -866,30 +867,6 @@ unsigned long compute_backup_blocks(int nchunk, int ochunk,
        return blocks;
 }
 
-/* 'struct reshape' records the intermediate states
- * a general reshape.
- * The starting geometry is converted to the 'before' geometry
- * by at most an atomic level change. They could be the same.
- * Similarly the 'after' geometry is converted to the final
- * geometry by at most a level change.
- * Note that 'before' and 'after' must have the same level.
- * 'blocks' is the minimum number of sectors for a reshape unit.
- * This will be a multiple of the stripe size in each of the
- * 'before' and 'after' geometries.
- * If 'blocks' is 0, no restriping is necessary.
- */
-struct reshape {
-       int level;
-       int parity; /* number of parity blocks/devices */
-       struct {
-               int layout;
-               int data_disks;
-       } before, after;
-       unsigned long long blocks;
-       unsigned long long stripes; /* number of old stripes that comprise 'blocks'*/
-       unsigned long long new_size; /* New size of array in sectors */
-};
-
 char *analyse_change(struct mdinfo *info, struct reshape *re)
 {
        /* Based on the current array state in info->array and
@@ -946,7 +923,7 @@ char *analyse_change(struct mdinfo *info, struct reshape *re)
                        re->before.data_disks = (info->array.raid_disks +
                                                 info->delta_disks);
                        re->before.layout = 0;
-                       re->blocks = 0;
+                       re->backup_blocks = 0;
                        re->parity = 0;
                        return NULL;
                }
@@ -957,7 +934,7 @@ char *analyse_change(struct mdinfo *info, struct reshape *re)
                        re->parity = 1;
                        re->before.data_disks = 1;
                        re->before.layout = ALGORITHM_LEFT_SYMMETRIC;
-                       re->blocks = 0;
+                       re->backup_blocks = 0;
                        return NULL;
                }
                /* Could do some multi-stage conversions, but leave that to
@@ -994,7 +971,7 @@ char *analyse_change(struct mdinfo *info, struct reshape *re)
                re->parity = 0;
                re->before.data_disks = new_disks;
                re->before.layout = 0;
-               re->blocks = 0;
+               re->backup_blocks = 0;
                return NULL;
 
        case 0:
@@ -1031,7 +1008,7 @@ char *analyse_change(struct mdinfo *info, struct reshape *re)
                        re->before.data_disks = (info->array.raid_disks +
                                                 info->delta_disks);
                        re->before.layout = info->new_layout;
-                       re->blocks = 0;
+                       re->backup_blocks = 0;
                        return NULL;
                }
 
@@ -1189,10 +1166,9 @@ char *analyse_change(struct mdinfo *info, struct reshape *re)
                                re->after.layout = info->new_layout;
                        break;
                case 6:
-                       if (info->new_layout == UnSet) {
-                               re->after.layout = re->before.layout;
-                               break;
-                       }
+                       if (info->new_layout == UnSet)
+                               info->new_layout = re->before.layout;
+
                        /* after.layout needs to be raid6 version of new_layout */
                        if (info->new_layout == ALGORITHM_PARITY_N)
                                re->after.layout = ALGORITHM_PARITY_N;
@@ -1220,7 +1196,7 @@ char *analyse_change(struct mdinfo *info, struct reshape *re)
                        re->after.data_disks = (info->array.raid_disks +
                                                info->delta_disks) - 2;
                if (info->new_layout == UnSet)
-                       re->after.layout = re->before.layout;
+                       re->after.layout = info->array.layout;
                else
                        re->after.layout = info->new_layout;
                break;
@@ -1242,12 +1218,12 @@ char *analyse_change(struct mdinfo *info, struct reshape *re)
            re->after.layout == re->before.layout &&
            info->new_chunk == info->array.chunk_size) {
                /* Nothing to change */
-               re->blocks = 0;
+               re->backup_blocks = 0;
                return NULL;
        }
        if (re->after.data_disks == 1 && re->before.data_disks == 1) {
                /* chunks can layout changes make no difference */
-               re->blocks = 0;
+               re->backup_blocks = 0;
                return NULL;
        }
 
@@ -1259,7 +1235,7 @@ char *analyse_change(struct mdinfo *info, struct reshape *re)
            get_linux_version() < 2006030)
                return "reshape to fewer devices is not supported before 2.6.32 - sorry.";
 
-       re->blocks = compute_backup_blocks(
+       re->backup_blocks = compute_backup_blocks(
                info->new_chunk, info->array.chunk_size,
                re->after.data_disks,
                re->before.data_disks);
@@ -1277,11 +1253,6 @@ static int reshape_container(char *container, int cfd, char *devname,
                             int force,
                             char *backup_file,
                             int quiet);
-static int child_monitor(int afd, struct mdinfo *sra, struct reshape *reshape,
-                        unsigned long stripes,
-                        int *fds, unsigned long long *offsets,
-                        int dests, int *destfd, unsigned long long *destoffsets);
-
 
 int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                 long long size,
@@ -1595,6 +1566,8 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                rv = reshape_array(container, fd, devname, st, &info, force,
                                   backup_file, quiet, 0);
        }
+       /* reshape_* released the array */
+       return rv;
 release:
        unfreeze(st, frozen);
        return rv;
@@ -1621,11 +1594,11 @@ static int reshape_array(char *container, int fd, char *devname,
        int nrdisks;
        int err;
        int frozen;
-       unsigned long blocks, stripes;
+       unsigned long blocks;
        unsigned long cache;
        unsigned long long array_size;
        int done;
-       struct mdinfo *sra, *sd;
+       struct mdinfo *sra;
 
        msg = analyse_change(info, &reshape);
        if (msg) {
@@ -1640,7 +1613,9 @@ static int reshape_array(char *container, int fd, char *devname,
                            reshape.after.data_disks)
                + reshape.parity - array.raid_disks;
 
-       if (!force && spares_needed < info->array.spare_disks) {
+       if (!force &&
+           info->new_level > 0 &&
+           spares_needed > info->array.spare_disks) {
                fprintf(stderr,
                        Name ": Need %d spare%s to avoid degraded array,"
                        " and only have %d.\n"
@@ -1669,7 +1644,7 @@ static int reshape_array(char *container, int fd, char *devname,
                        return 1;
                }
                if (!quiet)
-                       fprintf(stderr, Name " level of %s changed to %s\n",
+                       fprintf(stderr, Name ": level of %s changed to %s\n",
                                devname, c);    
                orig_level = info->array.level;
        }
@@ -1704,7 +1679,7 @@ static int reshape_array(char *container, int fd, char *devname,
                sysfs_free(info2);
        }
 
-       if (reshape.blocks == 0) {
+       if (reshape.backup_blocks == 0) {
                /* No restriping needed, but we might need to impose
                 * some more changes: layout, raid_disks, chunk_size
                 */
@@ -1789,7 +1764,7 @@ static int reshape_array(char *container, int fd, char *devname,
        }
 
        sra = sysfs_read(fd, 0,
-                        GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE||GET_CHUNK|
+                        GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE|GET_CHUNK|
                         GET_CACHE);
 
        if (!sra) {
@@ -1802,7 +1777,7 @@ static int reshape_array(char *container, int fd, char *devname,
        /* Decide how many blocks (sectors) for a reshape
         * unit.  The number we have so far is just a minimum
         */
-       blocks = reshape.blocks;
+       blocks = reshape.backup_blocks;
        if (reshape.before.data_disks == 
            reshape.after.data_disks) {
                /* Make 'blocks' bigger for better throughput, but
@@ -1843,7 +1818,7 @@ static int reshape_array(char *container, int fd, char *devname,
                goto release;
        }
        if (backup_file == NULL) {
-               if (reshape.after.data_disks <= reshape.before.data_disks) {
+               if (reshape.after.data_disks <= reshape.before.data_disks) {
                        fprintf(stderr,
                                Name ": %s: Cannot grow - need backup-file\n", 
                                devname);
@@ -1890,6 +1865,8 @@ static int reshape_array(char *container, int fd, char *devname,
         */
        sync_metadata(st);
 
+       sra->new_chunk = info->new_chunk;
+       
        if (info->array.chunk_size == info->new_chunk &&
            reshape.before.layout == reshape.after.layout &&
            st->ss->external == 0) {
@@ -1940,49 +1917,6 @@ static int reshape_array(char *container, int fd, char *devname,
        }
 
        start_reshape(sra);
-       if (st->ss->external) {
-               /* metadata handler takes it from here */
-               ping_manager(container);
-               st->ss->manage_reshape(st, backup_file);
-               frozen = 0;
-               goto release;
-       }
-
-       /* set up the backup-super-block.  This requires the
-        * uuid from the array.
-        */
-       /* Find a superblock */
-       for (sd = sra->devs; sd; sd = sd->next) {
-               char *dn;
-               int devfd;
-               int ok;
-               if (sd->disk.state & (1<<MD_DISK_FAULTY))
-                       continue;
-               dn = map_dev(sd->disk.major, sd->disk.minor, 1);
-               devfd = dev_open(dn, O_RDONLY);
-               if (devfd < 0)
-                       continue;
-               ok = st->ss->load_super(st, devfd, NULL);
-               close(devfd);
-               if (ok >= 0)
-                       break;
-       }
-       if (!sd) {
-               fprintf(stderr, Name ": %s: Cannot find a superblock\n",
-                       devname);
-               rv = 1;
-               abort_reshape(sra);
-               goto release;
-       }
-
-       memset(&bsb, 0, 512);
-       memcpy(bsb.magic, "md_backup_data-1", 16);
-       st->ss->uuid_from_super(st, (int*)&bsb.set_uuid);
-       bsb.mtime = __cpu_to_le64(time(0));
-       bsb.devstart2 = blocks;
-
-       stripes = reshape.blocks / (info->array.chunk_size/512) /
-               reshape.before.data_disks;
 
        /* Now we just need to kick off the reshape and watch, while
         * handling backups of the data...
@@ -1999,9 +1933,19 @@ static int reshape_array(char *container, int fd, char *devname,
 
                odisks = reshape.before.data_disks + reshape.parity;
 
-               done = child_monitor(fd, sra, &reshape, stripes,
-                                    fdlist, offsets,
-                                    d - odisks, fdlist+odisks, offsets+odisks);
+               if (st->ss->external) {
+                       /* metadata handler takes it from here */
+                       done = st->ss->manage_reshape(
+                               fd, sra, &reshape, st, blocks,
+                               fdlist, offsets,
+                               d - odisks, fdlist+odisks,
+                               offsets+odisks);
+               } else
+                       done = child_monitor(
+                               fd, sra, &reshape, st, blocks,
+                               fdlist, offsets,
+                               d - odisks, fdlist+odisks,
+                               offsets+odisks);
 
                if (backup_file && done)
                        unlink(backup_file);
@@ -2047,7 +1991,7 @@ static int reshape_array(char *container, int fd, char *devname,
                        }
                }
 
-               if (info->new_level != info->array.level) {
+               if (info->new_level != reshape.level) {
                        /* We need to wait for the reshape to finish
                         * (which will have happened unless
                         * odata < ndata) and then set the level
@@ -2058,16 +2002,13 @@ static int reshape_array(char *container, int fd, char *devname,
                                wait_reshape(sra);
 
                        c = map_num(pers, info->new_level);
-                       if (c == NULL) {
-                               if (forked)
-                                       return 1;
-                               exit(0);/* not possible */
-                       }
+                       if (c == NULL)
+                               goto out;/* not possible */
 
                        err = sysfs_set_str(sra, NULL, "level", c);
                        if (err)
                                fprintf(stderr, Name\
-                                       ": %s: could not set level"
+                                       ": %s: could not set level "
                                        "to %s\n", devname, c);
                }
        out:
@@ -2088,19 +2029,17 @@ static int reshape_array(char *container, int fd, char *devname,
 
 
  release:
-       if (rv) {
-               unfreeze(st, frozen);
-               return rv;
-       }
-       if (container)
-               ping_monitor(container);
-       if (st->ss->external) {
-               /* Re-load the metadata as much could have changed */
-               int cfd = open_dev(st->container_dev);
-               if (cfd >= 0) {
-                       st->ss->free_super(st);
-                       st->ss->load_container(st, cfd, container);
-                       close(cfd);
+       if (!rv) {
+               if (container)
+                       ping_monitor(container);
+               if (st->ss->external) {
+                       /* Re-load the metadata as much could have changed */
+                       int cfd = open_dev(st->container_dev);
+                       if (cfd >= 0) {
+                               st->ss->free_super(st);
+                               st->ss->load_container(st, cfd, container);
+                               close(cfd);
+                       }
                }
        }
        if (rv && orig_level != UnSet && sra) {
@@ -2257,7 +2196,7 @@ int progress_reshape(struct mdinfo *info, struct reshape *reshape,
         * - suspend_point is maintained by progress_reshape and the caller
         *   should not touch it except to initialise to zero.
         *   It is an array address and it only increases in 2.6.37 and earlier.
-        *   This makes it difficulty to handle reducing reshapes with
+        *   This makes it difficult to handle reducing reshapes with
         *   external metadata.
         *   However:  it is similar to backup_point in that it records the
         *     other end of a suspended region from  reshape_progress.
@@ -2278,11 +2217,14 @@ int progress_reshape(struct mdinfo *info, struct reshape *reshape,
 
        int advancing = (reshape->after.data_disks
                         >= reshape->before.data_disks);
-       int need_backup = (reshape->after.data_disks
-                          == reshape->before.data_disks);
+       unsigned long long need_backup; /* need to eventually backup all the way
+                                        * to here
+                                        */
        unsigned long long read_offset, write_offset;
-       unsigned long long read_range, write_range;
+       unsigned long long write_range;
        unsigned long long max_progress, target, completed;
+       unsigned long long array_size = (info->component_size
+                                        * reshape->before.data_disks);
        int fd;
 
        /* First, we unsuspend any region that is now known to be safe.
@@ -2291,14 +2233,14 @@ int progress_reshape(struct mdinfo *info, struct reshape *reshape,
         * native metadata when we don't need to back-up.
         */
        if (advancing) {
-               if (info->reshape_progress < *suspend_point)
+               if (info->reshape_progress <= *suspend_point)
                        sysfs_set_num(info, NULL, "suspend_lo",
                                      info->reshape_progress);
        } else {
                /* Note: this won't work in 2.6.37 and before.
                 * Something somewhere should make sure we don't need it!
                 */
-               if (info->reshape_progress > *suspend_point)
+               if (info->reshape_progress >= *suspend_point)
                        sysfs_set_num(info, NULL, "suspend_hi",
                                      info->reshape_progress);
        }
@@ -2314,29 +2256,32 @@ int progress_reshape(struct mdinfo *info, struct reshape *reshape,
         * If we need to suspend more, we limit it to 128M per device, which is
         * rather arbitrary and should be some time-based calculation.
         */
-       write_offset = info->reshape_progress / reshape->before.data_disks;
-       read_offset = info->reshape_progress / reshape->after.data_disks;
-       write_range = reshape->blocks / reshape->before.data_disks;
-       read_range = reshape->blocks / reshape->after.data_disks;
+       read_offset = info->reshape_progress / reshape->before.data_disks;
+       write_offset = info->reshape_progress / reshape->after.data_disks;
+       write_range = info->new_chunk/512;
        if (advancing) {
+               need_backup = 0;
                if (read_offset < write_offset + write_range) {
                        max_progress = backup_point;
-                       if (max_progress <= info->reshape_progress)
-                               need_backup = 1;
+                       if (reshape->before.data_disks == reshape->after.data_disks)
+                               need_backup = array_size;
+                       else
+                               need_backup = reshape->backup_blocks;
                } else {
                        max_progress =
-                               (read_offset - write_range) *
-                               reshape->before.data_disks;
+                               read_offset *
+                               reshape->after.data_disks;
                }
        } else {
+               need_backup = array_size;
                if (read_offset > write_offset - write_range) {
                        max_progress = backup_point;
                        if (max_progress >= info->reshape_progress)
-                               need_backup = 1;
+                               need_backup = 0;
                } else {
                        max_progress =
-                               (read_offset + write_range) *
-                               reshape->before.data_disks;
+                               read_offset *
+                               reshape->after.data_disks;
                        /* If we are using internal metadata, then we can
                         * progress all the way to the suspend_point without
                         * worrying about backing-up/suspending along the
@@ -2353,13 +2298,15 @@ int progress_reshape(struct mdinfo *info, struct reshape *reshape,
         * Consider extending suspend_point 128M per device if it
         * is less than 64M per device beyond reshape_progress.
         * But always do a multiple of 'blocks'
+        * FIXME this is too big - it takes to long to complete
+        * this much.
         */
        target = 64*1024*2 * min(reshape->before.data_disks,
                                  reshape->after.data_disks);
-       target /= reshape->blocks;
+       target /= reshape->backup_blocks;
        if (target < 2)
                target = 2;
-       target *= reshape->blocks;
+       target *= reshape->backup_blocks;
 
        /* For externally managed metadata we always need to suspend IO to
         * the area being reshaped so we regularly push suspend_point forward.
@@ -2367,24 +2314,32 @@ int progress_reshape(struct mdinfo *info, struct reshape *reshape,
         * a backup.
         */
        if (advancing) {
-               if ((need_backup || info->array.major_version < 0) &&
+               if ((need_backup > info->reshape_progress
+                    || info->array.major_version < 0) &&
                    *suspend_point < info->reshape_progress + target) {
-                       if (max_progress < *suspend_point + 2 * target)
-                               *suspend_point = max_progress;
-                       else
+                       if (need_backup < *suspend_point + 2 * target)
+                               *suspend_point = need_backup;
+                       else if (*suspend_point + 2 * target < array_size)
                                *suspend_point += 2 * target;
+                       else
+                               *suspend_point = array_size;
                        sysfs_set_num(info, NULL, "suspend_hi", *suspend_point);
-                       max_progress = *suspend_point;
+                       if (max_progress > *suspend_point)
+                               max_progress = *suspend_point;
                }
        } else {
-               if ((need_backup || info->array.major_version < 0) &&
+               if ((need_backup < info->reshape_progress
+                    || info->array.major_version < 0) &&
                    *suspend_point > info->reshape_progress - target) {
-                       if (max_progress > *suspend_point - 2 * target)
-                               *suspend_point = max_progress;
-                       else
+                       if (need_backup > *suspend_point - 2 * target)
+                               *suspend_point = need_backup;
+                       else if (*suspend_point >= 2 * target)
                                *suspend_point -= 2 * target;
+                       else
+                               *suspend_point = 0;
                        sysfs_set_num(info, NULL, "suspend_lo", *suspend_point);
-                       max_progress = *suspend_point;
+                       if (max_progress < *suspend_point)
+                               max_progress = *suspend_point;
                }
        }
 
@@ -2396,14 +2351,20 @@ int progress_reshape(struct mdinfo *info, struct reshape *reshape,
         * At the same time we convert wait_point to a similar number
         * for comparing against sync_completed.
         */
-       if (!advancing) {
-               max_progress = info->component_size * reshape->after.data_disks
-                       - max_progress;
-               wait_point = info->component_size * reshape->after.data_disks
-                       - wait_point;
-       }
+       /* scale down max_progress to per_disk */
        max_progress /= reshape->after.data_disks;
+       /* Round to chunk size as some kernels give an erroneously high number */
+       max_progress /= info->new_chunk/512;
+       max_progress *= info->new_chunk/512;
+       /* Limit progress to the whole device */
+       if (max_progress > info->component_size)
+               max_progress = info->component_size;
        wait_point /= reshape->after.data_disks;
+       if (!advancing) {
+               /* switch from 'device offset' to 'processed block count' */
+               max_progress = info->component_size - max_progress;
+               wait_point = info->component_size - wait_point;
+       }
 
        sysfs_set_num(info, NULL, "sync_max", max_progress);
 
@@ -2439,6 +2400,9 @@ int progress_reshape(struct mdinfo *info, struct reshape *reshape,
                        return -1;
                }
        }
+       /* some kernels can give an incorrectly high 'completed' number */
+       completed /= (info->new_chunk/512);
+       completed *= (info->new_chunk/512);
        /* Convert 'completed' back in to a 'progress' number */
        completed *= reshape->after.data_disks;
        if (!advancing) {
@@ -2450,10 +2414,11 @@ int progress_reshape(struct mdinfo *info, struct reshape *reshape,
        close(fd);
 
        /* We return the need_backup flag.  Caller will decide
-        * how much (a multiple of ->blocks) and will adjust
-        * suspend_{lo,hi} and suspend_point.
+        * how much - a multiple of ->backup_blocks up to *suspend_point
         */
-       return need_backup;
+       return advancing
+               ? (need_backup > info->reshape_progress)
+               : (need_backup < info->reshape_progress);
 }
 
 
@@ -2702,10 +2667,10 @@ static void validate(int afd, int bfd, unsigned long long offset)
        }
 }
 
-static int child_monitor(int afd, struct mdinfo *sra, struct reshape *reshape,
-                        unsigned long stripes,
-                        int *fds, unsigned long long *offsets,
-                        int dests, int *destfd, unsigned long long *destoffsets)
+int child_monitor(int afd, struct mdinfo *sra, struct reshape *reshape,
+                 struct supertype *st, unsigned long blocks,
+                 int *fds, unsigned long long *offsets,
+                 int dests, int *destfd, unsigned long long *destoffsets)
 {
        /* Monitor a reshape where backup is being performed using
         * 'native' mechanism - either to a backup file, or
@@ -2726,6 +2691,41 @@ static int child_monitor(int afd, struct mdinfo *sra, struct reshape *reshape,
        int data = reshape->before.data_disks;
        int disks = reshape->before.data_disks + reshape->parity;
        int chunk = sra->array.chunk_size;
+       struct mdinfo *sd;
+       unsigned long stripes;
+
+       /* set up the backup-super-block.  This requires the
+        * uuid from the array.
+        */
+       /* Find a superblock */
+       for (sd = sra->devs; sd; sd = sd->next) {
+               char *dn;
+               int devfd;
+               int ok;
+               if (sd->disk.state & (1<<MD_DISK_FAULTY))
+                       continue;
+               dn = map_dev(sd->disk.major, sd->disk.minor, 1);
+               devfd = dev_open(dn, O_RDONLY);
+               if (devfd < 0)
+                       continue;
+               ok = st->ss->load_super(st, devfd, NULL);
+               close(devfd);
+               if (ok >= 0)
+                       break;
+       }
+       if (!sd) {
+               fprintf(stderr, Name ": Cannot find a superblock\n");
+               return 0;
+       }
+
+       memset(&bsb, 0, 512);
+       memcpy(bsb.magic, "md_backup_data-1", 16);
+       st->ss->uuid_from_super(st, (int*)&bsb.set_uuid);
+       bsb.mtime = __cpu_to_le64(time(0));
+       bsb.devstart2 = blocks;
+
+       stripes = blocks / (sra->array.chunk_size/512) /
+               reshape->before.data_disks;
 
        if (posix_memalign((void**)&buf, 4096, disks * chunk))
                /* Don't start the 'reshape' */
@@ -2735,11 +2735,12 @@ static int child_monitor(int afd, struct mdinfo *sra, struct reshape *reshape,
                sysfs_set_num(sra, NULL, "sync_speed_min", 200000);
        }
 
-       array_size = sra->component_size * data;
        if (increasing) {
+               array_size = sra->component_size * reshape->after.data_disks;
                backup_point = sra->reshape_progress;
                suspend_point = 0;
        } else {
+               array_size = sra->component_size * reshape->before.data_disks;
                backup_point = array_size;
                suspend_point = array_size;
        }
@@ -2750,7 +2751,7 @@ static int child_monitor(int afd, struct mdinfo *sra, struct reshape *reshape,
                /* Want to return as soon the oldest backup slot can
                 * be released as that allows us to start backing up
                 * some more, providing suspend_point has been
-                * advanced, which it should have
+                * advanced, which it should have.
                 */
                if (increasing) {
                        wait_point = array_size;
@@ -2771,11 +2772,6 @@ static int child_monitor(int afd, struct mdinfo *sra, struct reshape *reshape,
                rv = progress_reshape(sra, reshape,
                                      backup_point, wait_point,
                                      &suspend_point, &reshape_completed);
-               if (rv < 0) {
-                       done = 1;
-                       break;
-               }
-
                /* external metadata would need to ping_monitor here */
                sra->reshape_progress = reshape_completed;
 
@@ -2798,19 +2794,43 @@ static int child_monitor(int afd, struct mdinfo *sra, struct reshape *reshape,
                                              destoffsets, 1);
                }
 
-               if (rv) {
+               if (rv < 0) {
+                       done = 1;
+                       break;
+               }
+
+               while (rv) {
                        unsigned long long offset;
-                       /* need to backup some space... */
+                       unsigned long actual_stripes;
+                       /* Need to backup some data.
+                        * If 'part' is not used and the desired
+                        * backup size is suspended, do a backup,
+                        * then consider the next part.
+                        */
                        /* Check that 'part' is unused */
                        if (part == 0 && __le64_to_cpu(bsb.length) != 0)
-                               abort(); /* BUG here */
+                               break;
                        if (part == 1 && __le64_to_cpu(bsb.length2) != 0)
-                               abort();
+                               break;
 
                        offset = backup_point / data;
-                       if (!increasing)
-                               offset -= stripes * (chunk/512);
-                       grow_backup(sra, offset, stripes,
+                       actual_stripes = stripes;
+                       if (increasing) {
+                               if (offset + actual_stripes * (chunk/512) >
+                                   sra->component_size)
+                                       actual_stripes = ((sra->component_size - offset)
+                                                         / (chunk/512));
+                               if (offset + actual_stripes * (chunk/512) >
+                                   suspend_point/data)
+                                       break;
+                       } else {
+                               if (offset < actual_stripes * (chunk/512))
+                                       actual_stripes = offset / (chunk/512);
+                               offset -= actual_stripes * (chunk/512);
+                               if (offset < suspend_point/data)
+                                       break;
+                       }
+                       grow_backup(sra, offset, actual_stripes,
                                    fds, offsets,
                                    disks, chunk, level, layout,
                                    dests, destfd, destoffsets,
@@ -2819,12 +2839,14 @@ static int child_monitor(int afd, struct mdinfo *sra, struct reshape *reshape,
                        /* record where 'part' is up to */
                        part = !part;
                        if (increasing)
-                               backup_point += stripes * (chunk/512) * data;
+                               backup_point += actual_stripes * (chunk/512) * data;
                        else
-                               backup_point -= stripes * (chunk/512) * data;
+                               backup_point -= actual_stripes * (chunk/512) * data;
                }
        }
 
+       /* FIXME maybe call progress_reshape one more time instead */
+       abort_reshape(sra); /* remove any remaining suspension */
        if (reshape->before.data_disks == reshape->after.data_disks)
                sysfs_set_num(sra, NULL, "sync_speed_min", speed);
        free(buf);