]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - Grow.c
Consistently print program Name and __func__ in debug messages.
[thirdparty/mdadm.git] / Grow.c
diff --git a/Grow.c b/Grow.c
index 52af9bfd9c4be63834c926e9a13da6d5ab286ffa..f2879768ca70354f6f6707a511af0b1fd7319819 100644 (file)
--- a/Grow.c
+++ b/Grow.c
@@ -1,7 +1,7 @@
 /*
  * mdadm - manage Linux "md" devices aka RAID arrays.
  *
- * Copyright (C) 2001-2012 Neil Brown <neilb@suse.de>
+ * Copyright (C) 2001-2013 Neil Brown <neilb@suse.de>
  *
  *
  *    This program is free software; you can redistribute it and/or modify
 #include       "mdadm.h"
 #include       "dlink.h"
 #include       <sys/mman.h>
+#include       <stddef.h>
+#include       <stdint.h>
+#include       <signal.h>
+#include       <sys/wait.h>
 
 #if ! defined(__BIG_ENDIAN) && ! defined(__LITTLE_ENDIAN)
 #error no endian defined
 #include       "md_u.h"
 #include       "md_p.h"
 
-#ifndef offsetof
-#define offsetof(t,f) ((size_t)&(((t*)0)->f))
-#endif
-
 int restore_backup(struct supertype *st,
                   struct mdinfo *content,
                   int working_disks,
                   int next_spare,
-                  char *backup_file,
+                  char **backup_filep,
                   int verbose)
 {
        int i;
@@ -47,10 +47,12 @@ int restore_backup(struct supertype *st,
        struct mdinfo *dev;
        int err;
        int disk_count = next_spare + working_disks;
+       char *backup_file = *backup_filep;
 
        dprintf("Called restore_backup()\n");
        fdlist = xmalloc(sizeof(int) * disk_count);
 
+       enable_fds(next_spare);
        for (i = 0; i < next_spare; i++)
                fdlist[i] = -1;
        for (dev = content->devs; dev; dev = dev->next) {
@@ -67,6 +69,11 @@ int restore_backup(struct supertype *st,
                        fdlist[next_spare++] = fd;
        }
 
+       if (!backup_file) {
+               backup_file = locate_backup(content->sys_name);
+               *backup_filep = backup_file;
+       }
+
        if (st->ss->external && st->ss->recover_backup)
                err = st->ss->recover_backup(st, content);
        else
@@ -382,7 +389,7 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
                                "with %s metadata\n", st->ss->name);
                        return 1;
                }
-               mdi = sysfs_read(fd, -1, GET_BITMAP_LOCATION);
+               mdi = sysfs_read(fd, NULL, GET_BITMAP_LOCATION);
                if (mdi)
                        offset_setable = 1;
                for (d=0; d< st->max_devs; d++) {
@@ -421,7 +428,7 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
                }
                if (offset_setable) {
                        st->ss->getinfo_super(st, mdi, NULL);
-                       sysfs_init(mdi, fd, -1);
+                       sysfs_init(mdi, fd, NULL);
                        rv = sysfs_set_num_signed(mdi, NULL, "bitmap/location",
                                                  mdi->bitmap_offset);
                } else {
@@ -533,13 +540,11 @@ static int check_idle(struct supertype *st)
        /* Check that all member arrays for this container, or the
         * container of this array, are idle
         */
-       int container_dev = (st->container_dev != NoMdDev
-                            ? st->container_dev : st->devnum);
-       char container[40];
+       char *container = (st->container_devnm[0]
+                          ? st->container_devnm : st->devnm);
        struct mdstat_ent *ent, *e;
        int is_idle = 1;
 
-       fmt_devname(container, container_dev);
        ent = mdstat_read(0, 0);
        for (e = ent ; e; e = e->next) {
                if (!is_container_member(e, container))
@@ -555,15 +560,12 @@ static int check_idle(struct supertype *st)
 
 static int freeze_container(struct supertype *st)
 {
-       int container_dev = (st->container_dev != NoMdDev
-                            ? st->container_dev : st->devnum);
-       char container[40];
+       char *container = (st->container_devnm[0]
+                          ? st->container_devnm : st->devnm);
 
        if (!check_idle(st))
                return -1;
 
-       fmt_devname(container, container_dev);
-
        if (block_monitor(container, 1)) {
                pr_err("failed to freeze container\n");
                return -2;
@@ -574,11 +576,8 @@ static int freeze_container(struct supertype *st)
 
 static void unfreeze_container(struct supertype *st)
 {
-       int container_dev = (st->container_dev != NoMdDev
-                            ? st->container_dev : st->devnum);
-       char container[40];
-
-       fmt_devname(container, container_dev);
+       char *container = (st->container_devnm[0]
+                          ? st->container_devnm : st->devnm);
 
        unblock_monitor(container, 1);
 }
@@ -594,7 +593,7 @@ static int freeze(struct supertype *st)
        if (st->ss->external)
                return freeze_container(st);
        else {
-               struct mdinfo *sra = sysfs_read(-1, st->devnum, GET_VERSION);
+               struct mdinfo *sra = sysfs_read(-1, st->devnm, GET_VERSION);
                int err;
                char buf[20];
 
@@ -616,10 +615,15 @@ static void unfreeze(struct supertype *st)
        if (st->ss->external)
                return unfreeze_container(st);
        else {
-               struct mdinfo *sra = sysfs_read(-1, st->devnum, GET_VERSION);
+               struct mdinfo *sra = sysfs_read(-1, st->devnm, GET_VERSION);
+               char buf[20];
 
-               if (sra)
+               if (sra &&
+                   sysfs_get_str(sra, NULL, "sync_action", buf, 20) > 0
+                   && strcmp(buf, "frozen\n") == 0) {
+                       printf("unfreeze\n");
                        sysfs_set_str(sra, NULL, "sync_action", "idle");
+               }
                sysfs_free(sra);
        }
 }
@@ -632,13 +636,9 @@ static void wait_reshape(struct mdinfo *sra)
        if (fd < 0)
                return;
 
-       while  (sysfs_fd_get_str(fd, action, 20) > 0 &&
-               strncmp(action, "reshape", 7) == 0) {
-               fd_set rfds;
-               FD_ZERO(&rfds);
-               FD_SET(fd, &rfds);
-               select(fd+1, NULL, NULL, &rfds, NULL);
-       }
+       while (sysfs_fd_get_str(fd, action, 20) > 0 &&
+              strncmp(action, "reshape", 7) == 0)
+               sysfs_wait(fd, NULL);
        close(fd);
 }
 
@@ -739,7 +739,8 @@ void abort_reshape(struct mdinfo *sra)
        sysfs_set_num(sra, NULL, "suspend_hi", 0);
        sysfs_set_num(sra, NULL, "suspend_lo", 0);
        sysfs_set_num(sra, NULL, "sync_min", 0);
-       sysfs_set_str(sra, NULL, "sync_max", "max");
+       // It isn't safe to reset sync_max as we aren't monitoring.
+       // Array really should be stopped at this point.
 }
 
 int remove_disks_for_takeover(struct supertype *st,
@@ -845,6 +846,7 @@ int reshape_prepare_fdlist(char *devname,
        int d = 0;
        struct mdinfo *sd;
 
+       enable_fds(nrdisks);
        for (d = 0; d <= nrdisks; d++)
                fdlist[d] = -1;
        d = raid_disks;
@@ -888,6 +890,7 @@ int reshape_open_backup_file(char *backup_file,
                             long blocks,
                             int *fdlist,
                             unsigned long long *offsets,
+                            char *sys_name,
                             int restart)
 {
        /* Return 1 on success, 0 on any form of failure */
@@ -935,18 +938,15 @@ int reshape_open_backup_file(char *backup_file,
                return 0;
        }
 
-       return 1;
-}
-
-unsigned long GCD(unsigned long a, unsigned long b)
-{
-       while (a != b) {
-               if (a < b)
-                       b -= a;
-               if (b < a)
-                       a -= b;
+       if (!restart && strncmp(backup_file, MAP_DIR, strlen(MAP_DIR)) != 0) {
+               char *bu = make_backup(sys_name);
+               if (symlink(backup_file, bu))
+                       pr_err("Recording backup file in " MAP_DIR "failed: %s\n",
+                              strerror(errno));
+               free(bu);
        }
-       return a;
+
+       return 1;
 }
 
 unsigned long compute_backup_blocks(int nchunk, int ochunk,
@@ -968,7 +968,7 @@ unsigned long compute_backup_blocks(int nchunk, int ochunk,
        return blocks;
 }
 
-char *analyse_change(struct mdinfo *info, struct reshape *re)
+char *analyse_change(char *devname, struct mdinfo *info, struct reshape *re)
 {
        /* Based on the current array state in info->array and
         * the changes in info->new_* etc, determine:
@@ -993,6 +993,8 @@ char *analyse_change(struct mdinfo *info, struct reshape *re)
         */
        int delta_parity = 0;
 
+       memset(re, 0, sizeof(*re));
+
        /* If a new level not explicitly given, we assume no-change */
        if (info->new_level == UnSet)
                info->new_level = info->array.level;
@@ -1007,9 +1009,16 @@ char *analyse_change(struct mdinfo *info, struct reshape *re)
                        /* chunk size is meaningful, must divide component_size
                         * evenly
                         */
-                       if (info->component_size % (info->new_chunk/512))
-                               return "New chunk size does not"
-                                       " divide component size";
+                       if (info->component_size % (info->new_chunk/512)) {
+                               unsigned long long shrink = info->component_size;
+                               shrink &= ~(unsigned long long)(info->new_chunk/512-1);
+                               pr_err("New chunk size (%dK) does not evenly divide device size (%lluk)\n",
+                                      info->new_chunk/1024, info->component_size/2);
+                               pr_err("After shrinking any filesystem, \"mdadm --grow %s --size %llu\"\n",
+                                      devname, shrink/2);
+                               pr_err("will shrink the array so the given chunk size would work.\n");
+                               return "";
+                       }
                        break;
                default:
                        return "chunk size not meaningful for this level";
@@ -1019,7 +1028,12 @@ char *analyse_change(struct mdinfo *info, struct reshape *re)
 
        switch (info->array.level) {
        default:
-               return "Cannot understand this RAID level";
+               return "No reshape is possibly for this RAID level";
+       case LEVEL_LINEAR:
+               if (info->delta_disks != UnSet)
+                       return "Only --add is supported for LINEAR, setting --raid-disks is not needed";
+               else
+                       return "Only --add is supported for LINEAR, other --grow options are not meaningful";
        case 1:
                /* RAID1 can convert to RAID1 with different disks, or
                 * raid5 with 2 disks, or
@@ -1037,9 +1051,6 @@ char *analyse_change(struct mdinfo *info, struct reshape *re)
                        re->level = 0;
                        re->before.data_disks = 1;
                        re->after.data_disks = 1;
-                       re->before.layout = 0;
-                       re->backup_blocks = 0;
-                       re->parity = 0;
                        return NULL;
                }
                if (info->new_level == 1) {
@@ -1047,8 +1058,6 @@ char *analyse_change(struct mdinfo *info, struct reshape *re)
                                /* Don't know what to do */
                                return "no change requested for Growing RAID1";
                        re->level = 1;
-                       re->backup_blocks = 0;
-                       re->parity = 0;
                        return NULL;
                }
                if (info->array.raid_disks == 2 &&
@@ -1101,11 +1110,8 @@ char *analyse_change(struct mdinfo *info, struct reshape *re)
 
                        /* looks good */
                        re->level = 0;
-                       re->parity = 0;
                        re->before.data_disks = new_disks;
                        re->after.data_disks = re->before.data_disks;
-                       re->before.layout = 0;
-                       re->backup_blocks = 0;
                        return NULL;
 
                case 10:
@@ -1138,24 +1144,25 @@ char *analyse_change(struct mdinfo *info, struct reshape *re)
                        new_chunk = info->new_chunk * far;
 
                        re->level = 10;
-                       re->parity = 0;
                        re->before.layout = info->array.layout;
                        re->before.data_disks = info->array.raid_disks;
                        re->after.layout = info->new_layout;
                        re->after.data_disks = new_disks;
-                       /* For RAID10 we don't do backup, and there is
-                        * no need to synchronise stripes on both
+                       /* For RAID10 we don't do backup but do allow reshape,
+                        * so set backup_blocks to INVALID_SECTORS rather than
+                        * zero.
+                        * And there is no need to synchronise stripes on both
                         * 'old' and  'new'.  So the important
                         * number is the minimum data_offset difference
                         * which is the larger of (offset copies * chunk).
                         */
-
-                       re->backup_blocks = max(old_chunk, new_chunk) / 512;
+                       re->backup_blocks = INVALID_SECTORS;
+                       re->min_offset_change = max(old_chunk, new_chunk) / 512;
                        if (new_disks < re->before.data_disks &&
-                           info->space_after < re->backup_blocks)
+                           info->space_after < re->min_offset_change)
                                /* Reduce component size by one chunk */
                                re->new_size = (info->component_size -
-                                               re->backup_blocks);
+                                               re->min_offset_change);
                        else
                                re->new_size = info->component_size;
                        re->new_size = re->new_size * new_disks / copies;
@@ -1194,12 +1201,10 @@ char *analyse_change(struct mdinfo *info, struct reshape *re)
                                return "Cannot change chunk-size with RAID0->RAID10";
                        /* looks good */
                        re->level = 10;
-                       re->parity = 0;
                        re->before.data_disks = (info->array.raid_disks +
                                                 info->delta_disks);
                        re->after.data_disks = re->before.data_disks;
                        re->before.layout = info->new_layout;
-                       re->backup_blocks = 0;
                        return NULL;
                }
 
@@ -1217,11 +1222,15 @@ char *analyse_change(struct mdinfo *info, struct reshape *re)
                        delta_parity = 1;
                        re->level = 5;
                        re->before.layout = ALGORITHM_PARITY_N;
+                       if (info->new_layout == UnSet)
+                               info->new_layout = map_name(r5layout, "default");
                        break;
                case 6:
                        delta_parity = 2;
                        re->level = 6;
                        re->before.layout = ALGORITHM_PARITY_N;
+                       if (info->new_layout == UnSet)
+                               info->new_layout = map_name(r6layout, "default");
                        break;
                default:
                        return "Impossible level change requested";
@@ -1281,7 +1290,6 @@ char *analyse_change(struct mdinfo *info, struct reshape *re)
                                return "Cannot set raid_disk when "
                                        "converting RAID5->RAID1";
                        re->level = 1;
-                       re->backup_blocks = 0;
                        info->new_chunk = 0;
                        return NULL;
                default:
@@ -1321,6 +1329,7 @@ char *analyse_change(struct mdinfo *info, struct reshape *re)
 
                switch (re->level) {
                case 4:
+                       re->before.layout = 0;
                        re->after.layout = 0;
                        break;
                case 5:
@@ -1423,17 +1432,20 @@ char *analyse_change(struct mdinfo *info, struct reshape *re)
        /* So we have a restripe operation, we need to calculate the number
         * of blocks per reshape operation.
         */
+       re->new_size = info->component_size * re->before.data_disks;
        if (info->new_chunk == 0)
                info->new_chunk = info->array.chunk_size;
        if (re->after.data_disks == re->before.data_disks &&
            re->after.layout == re->before.layout &&
            info->new_chunk == info->array.chunk_size) {
-               /* Nothing to change */
+               /* Nothing to change, can change level immediately. */
+               re->level = info->new_level;
                re->backup_blocks = 0;
                return NULL;
        }
        if (re->after.data_disks == 1 && re->before.data_disks == 1) {
                /* chunk and layout changes make no difference */
+               re->level = info->new_level;
                re->backup_blocks = 0;
                return NULL;
        }
@@ -1450,6 +1462,7 @@ char *analyse_change(struct mdinfo *info, struct reshape *re)
                info->new_chunk, info->array.chunk_size,
                re->after.data_disks,
                re->before.data_disks);
+       re->min_offset_change = re->backup_blocks / re->before.data_disks;
 
        re->new_size = info->component_size * re->after.data_disks;
        return NULL;
@@ -1483,8 +1496,8 @@ static int set_array_size(struct supertype *st, struct mdinfo *sra,
                                ret_val = 0;
                                dprintf("Array size changed");
                        }
-                       dprintf(" from %llu to %llu.\n",
-                               current_size, new_size);
+                       dprintf_cont(" from %llu to %llu.\n",
+                                    current_size, new_size);
                }
                sysfs_free(info);
        } else
@@ -1504,8 +1517,8 @@ static int reshape_container(char *container, char *devname,
                             struct supertype *st,
                             struct mdinfo *info,
                             int force,
-                            char *backup_file,
-                            int verbose, int restart, int freeze_reshape);
+                            char *backup_file, int verbose,
+                            int forked, int restart, int freeze_reshape);
 
 int Grow_reshape(char *devname, int fd,
                 struct mddev_dev *devlist,
@@ -1537,7 +1550,6 @@ int Grow_reshape(char *devname, int fd,
        int frozen;
        int changed = 0;
        char *container = NULL;
-       char container_buf[20];
        int cfd = -1;
 
        struct mddev_dev *dv;
@@ -1546,13 +1558,13 @@ int Grow_reshape(char *devname, int fd,
        struct mdinfo info;
        struct mdinfo *sra;
 
-
        if (ioctl(fd, GET_ARRAY_INFO, &array) < 0) {
-               fprintf(stderr, Name ": %s is not an active md array - aborting\n",
+               pr_err("%s is not an active md array - aborting\n",
                        devname);
                return 1;
        }
-       if (data_offset != INVALID_SECTORS && array.level != 10) {
+       if (data_offset != INVALID_SECTORS && array.level != 10
+           && (array.level < 4 || array.level > 6)) {
                pr_err("--grow --data-offset not yet supported\n");
                return 1;
        }
@@ -1590,16 +1602,15 @@ int Grow_reshape(char *devname, int fd,
         * pre-requisite spare devices (mdmon owns final validation)
         */
        if (st->ss->external) {
-               int container_dev;
                int rv;
 
                if (subarray) {
-                       container_dev = st->container_dev;
-                       cfd = open_dev_excl(st->container_dev);
+                       container = st->container_devnm;
+                       cfd = open_dev_excl(st->container_devnm);
                } else {
-                       container_dev = st->devnum;
+                       container = st->devnm;
                        close(fd);
-                       cfd = open_dev_excl(st->devnum);
+                       cfd = open_dev_excl(st->devnm);
                        fd = cfd;
                }
                if (cfd < 0) {
@@ -1609,9 +1620,6 @@ int Grow_reshape(char *devname, int fd,
                        return 1;
                }
 
-               fmt_devname(container_buf, container_dev);
-               container = container_buf;
-
                rv = st->ss->load_container(st, cfd, NULL);
 
                if (rv) {
@@ -1642,7 +1650,7 @@ int Grow_reshape(char *devname, int fd,
                                        pr_err("cannot reshape arrays in"
                                               " container with unsupported"
                                               " metadata: %s(%s)\n",
-                                              devname, container_buf);
+                                              devname, container);
                                        sysfs_free(cc);
                                        free(subarray);
                                        return 1;
@@ -1650,7 +1658,7 @@ int Grow_reshape(char *devname, int fd,
                        }
                        sysfs_free(cc);
                }
-               if (mdmon_running(container_dev))
+               if (mdmon_running(container))
                        st->update_tail = &st->updates;
        }
 
@@ -1669,7 +1677,7 @@ int Grow_reshape(char *devname, int fd,
                return 1;
        }
 
-       sra = sysfs_read(fd, 0, GET_LEVEL | GET_DISKS | GET_DEVS
+       sra = sysfs_read(fd, NULL, GET_LEVEL | GET_DISKS | GET_DEVS
                         | GET_STATE | GET_VERSION);
        if (sra) {
                if (st->ss->external && subarray == NULL) {
@@ -1703,6 +1711,12 @@ int Grow_reshape(char *devname, int fd,
                if (orig_size == 0)
                        orig_size = (unsigned) array.size;
 
+               if (orig_size == 0) {
+                       pr_err("Cannot set device size in this type of array.\n");
+                       rv = 1;
+                       goto release;
+               }
+
                if (reshape_super(st, s->size, UnSet, UnSet, 0, 0, UnSet, NULL,
                                  devname, APPLY_METADATA_CHANGES, c->verbose > 0)) {
                        rv = 1;
@@ -1793,10 +1807,10 @@ int Grow_reshape(char *devname, int fd,
                        }
                        /* make sure mdmon is
                         * aware of the new level */
-                       if (!mdmon_running(st->container_dev))
-                               start_mdmon(st->container_dev);
+                       if (!mdmon_running(st->container_devnm))
+                               start_mdmon(st->container_devnm);
                        ping_monitor(container);
-                       if (mdmon_running(st->container_dev) &&
+                       if (mdmon_running(st->container_devnm) &&
                                        st->update_tail == NULL)
                                st->update_tail = &st->updates;
                }
@@ -1804,7 +1818,7 @@ int Grow_reshape(char *devname, int fd,
                if (s->size == MAX_SIZE)
                        s->size = 0;
                array.size = s->size;
-               if ((unsigned)array.size != s->size) {
+               if (s->size & ~INT32_MAX) {
                        /* got truncated to 32bit, write to
                         * component_size instead
                         */
@@ -1884,6 +1898,7 @@ size_change_error:
        if ((s->level == UnSet || s->level == array.level) &&
            (s->layout_str == NULL) &&
            (s->chunk == 0 || s->chunk == array.chunk_size) &&
+           data_offset == INVALID_SECTORS &&
            (s->raiddisks == 0 || s->raiddisks == array.raid_disks)) {
                /* Nothing more to do */
                if (!changed && c->verbose >= 0)
@@ -1904,7 +1919,7 @@ size_change_error:
                int err;
                err = remove_disks_for_takeover(st, sra, array.layout);
                if (err) {
-                       dprintf(Name": Array cannot be reshaped\n");
+                       dprintf("Array cannot be reshaped\n");
                        if (cfd > -1)
                                close(cfd);
                        rv = 1;
@@ -1920,7 +1935,7 @@ size_change_error:
 
        memset(&info, 0, sizeof(info));
        info.array = array;
-       sysfs_init(&info, fd, NoMdDev);
+       sysfs_init(&info, fd, NULL);
        strcpy(info.text_version, sra->text_version);
        info.component_size = s->size*2;
        info.new_level = s->level;
@@ -2055,7 +2070,7 @@ size_change_error:
                 * performed at the level of the container
                 */
                rv = reshape_container(container, devname, -1, st, &info,
-                                      c->force, c->backup_file, c->verbose, 0, 0);
+                                      c->force, c->backup_file, c->verbose, 0, 0, 0);
                frozen = 0;
        } else {
                /* get spare devices from external metadata
@@ -2118,7 +2133,7 @@ static int verify_reshape_position(struct mdinfo *info, int level)
                char *ep;
                unsigned long long position = strtoull(buf, &ep, 0);
 
-               dprintf(Name": Read sync_max sysfs entry is: %s\n", buf);
+               dprintf("Read sync_max sysfs entry is: %s\n", buf);
                if (!(ep == buf || (*ep != 0 && *ep != '\n' && *ep != ' '))) {
                        position *= get_data_disks(level,
                                                   info->new_layout,
@@ -2152,60 +2167,63 @@ static int verify_reshape_position(struct mdinfo *info, int level)
        return ret_val;
 }
 
-static int raid10_reshape(char *container, int fd, char *devname,
-                         struct supertype *st, struct mdinfo *info,
-                         struct reshape *reshape,
-                         unsigned long long data_offset,
-                         int force, int verbose)
+static unsigned long long choose_offset(unsigned long long lo,
+                                       unsigned long long hi,
+                                       unsigned long long min,
+                                       unsigned long long max)
 {
-       /* Changing raid_disks, layout, chunksize or possibly
-        * just data_offset for a RAID10.
-        * We must always change data_offset.  We change by at least
-        * ->backup_blocks which is the largest of the old and new
-        * chunk sizes.
-        * If raid_disks is increasing, then data_offset must decrease
-        * by at least this copy size.
-        * If raid_disks is unchanged, data_offset must increase or
-        * decrease by at least backup_blocks but preferably by much more.
-        * We choose half of the available space.
-        * If raid_disks is decreasing, data_offset must increase by
-        * at least backup_blocks.  To allow of this, component_size
-        * must be decreased by the same amount.
+       /* Choose a new offset between hi and lo.
+        * It must be between min and max, but
+        * we would prefer something near the middle of hi/lo, and also
+        * prefer to be aligned to a big power of 2.
         *
-        * So we calculate the required minimum and direction, possibly
-        * reduce the component_size, then iterate through the devices
-        * and set the new_data_offset.
-        * If that all works, we set chunk_size, layout, raid_disks, and start
-        * 'reshape'
+        * So we start with the middle, then for each bit,
+        * starting at '1' and increasing, if it is set, we either
+        * add it or subtract it if possible, preferring the option
+        * which is furthest from the boundary.
+        *
+        * We stop once we get a 1MB alignment. As units are in sectors,
+        * 1MB = 2*1024 sectors.
         */
-       struct mdinfo *sra, *sd;
-       unsigned long long min;
-       int dir = 0;
-       int err = 0;
+       unsigned long long choice = (lo + hi) / 2;
+       unsigned long long bit = 1;
 
-       sra = sysfs_read(fd, 0,
-                        GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE|GET_CHUNK
-               );
-       if (!sra) {
-               fprintf(stderr, Name ": %s: Cannot get array details from sysfs\n",
-                       devname);
-               goto release;
+       for (bit = 1; bit < 2*1024; bit = bit << 1) {
+               unsigned long long bigger, smaller;
+               if (! (bit & choice))
+                       continue;
+               bigger = choice + bit;
+               smaller = choice - bit;
+               if (bigger > max && smaller < min)
+                       break;
+               if (bigger > max)
+                       choice = smaller;
+               else if (smaller < min)
+                       choice = bigger;
+               else if (hi - bigger > smaller - lo)
+                       choice = bigger;
+               else
+                       choice = smaller;
        }
-       min = reshape->backup_blocks;
+       return choice;
+}
 
-       if (info->delta_disks)
-               sysfs_set_str(sra, NULL, "reshape_direction",
-                             info->delta_disks < 0 ? "backwards" : "forwards");
-       if (info->delta_disks < 0 &&
-           info->space_after < reshape->backup_blocks) {
-               int rv = sysfs_set_num(sra, NULL, "component_size",
-                                      (sra->component_size -
-                                       reshape->backup_blocks)/2);
-               if (rv) {
-                       fprintf(stderr, Name ": cannot reduce component size\n");
-                       goto release;
-               }
-       }
+static int set_new_data_offset(struct mdinfo *sra, struct supertype *st,
+                              char *devname, int delta_disks,
+                              unsigned long long data_offset,
+                              unsigned long long min,
+                              int can_fallback)
+{
+       struct mdinfo *sd;
+       int dir = 0;
+       int err = 0;
+       unsigned long long before, after;
+
+       /* Need to find min space before and after so same is used
+        * on all devices
+        */
+       before = UINT64_MAX;
+       after = UINT64_MAX;
        for (sd = sra->devs; sd; sd = sd->next) {
                char *dn;
                int dfd;
@@ -2218,10 +2236,8 @@ static int raid10_reshape(char *container, int fd, char *devname,
                dn = map_dev(sd->disk.major, sd->disk.minor, 0);
                dfd = dev_open(dn, O_RDONLY);
                if (dfd < 0) {
-                       fprintf(stderr,
-                               Name ": %s: cannot open component %s\n",
+                       pr_err("%s: cannot open component %s\n",
                                devname, dn ? dn : "-unknown-");
-                       rv = -1;
                        goto release;
                }
                st2 = dup_super(st);
@@ -2229,123 +2245,249 @@ static int raid10_reshape(char *container, int fd, char *devname,
                close(dfd);
                if (rv) {
                        free(st2);
-                       fprintf(stderr, ": %s: cannot get superblock from %s\n",
+                       pr_err("%s: cannot get superblock from %s\n",
                                devname, dn);
                        goto release;
                }
                st2->ss->getinfo_super(st2, &info2, NULL);
                st2->ss->free_super(st2);
                free(st2);
-               if (info->delta_disks < 0) {
+               if (info2.space_before == 0 &&
+                   info2.space_after == 0) {
+                       /* Metadata doesn't support data_offset changes */
+                       pr_err("%s: Metadata version doesn't support"
+                              " data_offset changes\n", devname);
+                       goto fallback;
+               }
+               if (before > info2.space_before)
+                       before = info2.space_before;
+               if (after > info2.space_after)
+                       after = info2.space_after;
+
+               if (data_offset != INVALID_SECTORS) {
+                       if (dir == 0) {
+                               if (info2.data_offset == data_offset) {
+                                       pr_err("%s: already has that data_offset\n",
+                                              dn);
+                                       goto release;
+                               }
+                               if (data_offset < info2.data_offset)
+                                       dir = -1;
+                               else
+                                       dir = 1;
+                       } else if ((data_offset <= info2.data_offset && dir == 1) ||
+                                  (data_offset >= info2.data_offset && dir == -1)) {
+                               pr_err("%s: differing data offsets on devices make this --data-offset setting impossible\n",
+                                       dn);
+                               goto release;
+                       }
+               }
+       }
+       if (before == UINT64_MAX)
+               /* impossible really, there must be no devices */
+               return 1;
+
+       for (sd = sra->devs; sd; sd = sd->next) {
+               char *dn = map_dev(sd->disk.major, sd->disk.minor, 0);
+               unsigned long long new_data_offset;
+
+               if (sd->disk.state & (1<<MD_DISK_FAULTY))
+                       continue;
+               if (delta_disks < 0) {
                        /* Don't need any space as array is shrinking
                         * just move data_offset up by min
                         */
-                       if (data_offset == 1)
-                               info2.new_data_offset = info2.data_offset + min;
+                       if (data_offset == INVALID_SECTORS)
+                               new_data_offset = sd->data_offset + min;
                        else {
-                               if ((unsigned long long)data_offset
-                                   < info2.data_offset + min) {
-                                       fprintf(stderr, Name ": --data-offset too small for %s\n",
+                               if (data_offset < sd->data_offset + min) {
+                                       pr_err("--data-offset too small for %s\n",
                                                dn);
                                        goto release;
                                }
-                               info2.new_data_offset = data_offset;
+                               new_data_offset = data_offset;
                        }
-               } else if (info->delta_disks > 0) {
+               } else if (delta_disks > 0) {
                        /* need space before */
-                       if (info2.space_before < min) {
-                               fprintf(stderr, Name ": Insufficient head-space for reshape on %s\n",
+                       if (before < min) {
+                               if (can_fallback)
+                                       goto fallback;
+                               pr_err("Insufficient head-space for reshape on %s\n",
                                        dn);
                                goto release;
                        }
-                       if (data_offset == 1)
-                               info2.new_data_offset = info2.data_offset - min;
+                       if (data_offset == INVALID_SECTORS)
+                               new_data_offset = sd->data_offset - min;
                        else {
-                               if ((unsigned long long)data_offset
-                                   > info2.data_offset - min) {
-                                       fprintf(stderr, Name ": --data-offset too large for %s\n",
+                               if (data_offset > sd->data_offset - min) {
+                                       pr_err("--data-offset too large for %s\n",
                                                dn);
                                        goto release;
                                }
-                               info2.new_data_offset = data_offset;
+                               new_data_offset = data_offset;
                        }
                } else {
                        if (dir == 0) {
-                               /* can move up or down. 'data_offset'
-                                * might guide us, otherwise choose
-                                * direction with most space
+                               /* can move up or down.  If 'data_offset'
+                                * was set we would have already decided,
+                                * so just choose direction with most space.
                                 */
-                               if (data_offset == 1) {
-                                       if (info2.space_before > info2.space_after)
-                                               dir = -1;
-                                       else
-                                               dir = 1;
-                               } else if (data_offset < info2.data_offset)
+                               if (before > after)
                                        dir = -1;
                                else
                                        dir = 1;
-                               sysfs_set_str(sra, NULL, "reshape_direction",
-                                             dir == 1 ? "backwards" : "forwards");
                        }
-                       switch (dir) {
-                       case 1: /* Increase data offset */
-                               if (info2.space_after < min) {
-                                       fprintf(stderr, Name ": Insufficient tail-space for reshape on %s\n",
+                       sysfs_set_str(sra, NULL, "reshape_direction",
+                                     dir == 1 ? "backwards" : "forwards");
+                       if (dir > 0) {
+                               /* Increase data offset */
+                               if (after < min) {
+                                       if (can_fallback)
+                                               goto fallback;
+                                       pr_err("Insufficient tail-space for reshape on %s\n",
                                                dn);
                                        goto release;
                                }
-                               if (data_offset != 1 &&
-                                   data_offset < info2.data_offset + min) {
-                                       fprintf(stderr, Name ": --data-offset too small on %s\n",
+                               if (data_offset != INVALID_SECTORS &&
+                                   data_offset < sd->data_offset + min) {
+                                       pr_err("--data-offset too small on %s\n",
                                                dn);
                                        goto release;
                                }
-                               if (data_offset != 1)
-                                       info2.new_data_offset = data_offset;
-                               else {
-                                       unsigned long long off =
-                                               info2.space_after / 2;
-                                       off &= ~7ULL;
-                                       if (off < min)
-                                               off = min;
-                                       info2.new_data_offset =
-                                               info2.data_offset + off;
-                               }
-                               break;
-                       case -1: /* Decrease data offset */
-                               if (info2.space_before < min) {
-                                       fprintf(stderr, Name ": insufficient head-room on %s\n",
+                               if (data_offset != INVALID_SECTORS)
+                                       new_data_offset = data_offset;
+                               else
+                                       new_data_offset = choose_offset(sd->data_offset,
+                                                                       sd->data_offset + after,
+                                                                       sd->data_offset + min,
+                                                                       sd->data_offset + after);
+                       } else {
+                               /* Decrease data offset */
+                               if (before < min) {
+                                       if (can_fallback)
+                                               goto fallback;
+                                       pr_err("insufficient head-room on %s\n",
                                                dn);
                                        goto release;
                                }
-                               if (data_offset != 1 &&
-                                   data_offset < info2.data_offset - min) {
-                                       fprintf(stderr, Name ": --data-offset too small on %s\n",
+                               if (data_offset != INVALID_SECTORS &&
+                                   data_offset < sd->data_offset - min) {
+                                       pr_err("--data-offset too small on %s\n",
                                                dn);
                                        goto release;
                                }
-                               if (data_offset != 1)
-                                       info2.new_data_offset = data_offset;
-                               else {
-                                       unsigned long long off =
-                                               info2.space_before / 2;
-                                       off &= ~7ULL;
-                                       if (off < min)
-                                               off = min;
-                                       info2.new_data_offset =
-                                               info2.data_offset - off;
-                               }
-                               break;
+                               if (data_offset != INVALID_SECTORS)
+                                       new_data_offset = data_offset;
+                               else
+                                       new_data_offset = choose_offset(sd->data_offset - before,
+                                                                       sd->data_offset,
+                                                                       sd->data_offset - before,
+                                                                       sd->data_offset - min);
                        }
                }
-               if (sysfs_set_num(sra, sd, "new_offset",
-                                 info2.new_data_offset) < 0) {
-                       err = errno;
-                       fprintf(stderr, Name ": Cannot set new_offset for %s\n",
+               err = sysfs_set_num(sra, sd, "new_offset", new_data_offset);
+               if (err < 0 && errno == E2BIG) {
+                       /* try again after increasing data size to max */
+                       err = sysfs_set_num(sra, sd, "size", 0);
+                       if (err < 0 && errno == EINVAL &&
+                           !(sd->disk.state & (1<<MD_DISK_SYNC))) {
+                               /* some kernels have a bug where you cannot
+                                * use '0' on spare devices. */
+                               sysfs_set_num(sra, sd, "size",
+                                             (sra->component_size + after)/2);
+                       }
+                       err = sysfs_set_num(sra, sd, "new_offset",
+                                           new_data_offset);
+               }
+               if (err < 0) {
+                       if (errno == E2BIG && data_offset != INVALID_SECTORS) {
+                               pr_err("data-offset is too big for %s\n",
+                                      dn);
+                               goto release;
+                       }
+                       if (sd == sra->devs &&
+                           (errno == ENOENT || errno == E2BIG))
+                               /* Early kernel, no 'new_offset' file,
+                                * or kernel doesn't like us.
+                                * For RAID5/6 this is not fatal
+                                */
+                               return 1;
+                       pr_err("Cannot set new_offset for %s\n",
                                dn);
                        break;
                }
        }
+       return err;
+release:
+       return -1;
+fallback:
+       /* Just use a backup file */
+       return 1;
+}
+
+static int raid10_reshape(char *container, int fd, char *devname,
+                         struct supertype *st, struct mdinfo *info,
+                         struct reshape *reshape,
+                         unsigned long long data_offset,
+                         int force, int verbose)
+{
+       /* Changing raid_disks, layout, chunksize or possibly
+        * just data_offset for a RAID10.
+        * We must always change data_offset.  We change by at least
+        * ->min_offset_change which is the largest of the old and new
+        * chunk sizes.
+        * If raid_disks is increasing, then data_offset must decrease
+        * by at least this copy size.
+        * If raid_disks is unchanged, data_offset must increase or
+        * decrease by at least min_offset_change but preferably by much more.
+        * We choose half of the available space.
+        * If raid_disks is decreasing, data_offset must increase by
+        * at least min_offset_change.  To allow of this, component_size
+        * must be decreased by the same amount.
+        *
+        * So we calculate the required minimum and direction, possibly
+        * reduce the component_size, then iterate through the devices
+        * and set the new_data_offset.
+        * If that all works, we set chunk_size, layout, raid_disks, and start
+        * 'reshape'
+        */
+       struct mdinfo *sra;
+       unsigned long long min;
+       int err = 0;
+
+       sra = sysfs_read(fd, NULL,
+                        GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE|GET_CHUNK
+               );
+       if (!sra) {
+               pr_err("%s: Cannot get array details from sysfs\n",
+                       devname);
+               goto release;
+       }
+       min = reshape->min_offset_change;
+
+       if (info->delta_disks)
+               sysfs_set_str(sra, NULL, "reshape_direction",
+                             info->delta_disks < 0 ? "backwards" : "forwards");
+       if (info->delta_disks < 0 &&
+           info->space_after < min) {
+               int rv = sysfs_set_num(sra, NULL, "component_size",
+                                      (sra->component_size -
+                                       min)/2);
+               if (rv) {
+                       pr_err("cannot reduce component size\n");
+                       goto release;
+               }
+       }
+       err = set_new_data_offset(sra, st, devname, info->delta_disks, data_offset,
+                                 min, 0);
+       if (err == 1) {
+               pr_err("Cannot set new_data_offset: RAID10 reshape not\n");
+               cont_err("supported on this kernel\n");
+               err = -1;
+       }
+       if (err < 0)
+               goto release;
+
        if (!err && sysfs_set_num(sra, NULL, "chunk_size", info->new_chunk) < 0)
                err = errno;
        if (!err && sysfs_set_num(sra, NULL, "layout", reshape->after.layout) < 0)
@@ -2356,14 +2498,13 @@ static int raid10_reshape(char *container, int fd, char *devname,
        if (!err && sysfs_set_str(sra, NULL, "sync_action", "reshape") < 0)
                err = errno;
        if (err) {
-               fprintf(stderr, Name ": Cannot set array shape for %s\n",
-                       devname);
-                       if (err == EBUSY &&
-                           (info->array.state & (1<<MD_SB_BITMAP_PRESENT)))
-                               fprintf(stderr,
-                                       "       Bitmap must be removed before"
-                                       " shape can be changed\n");
-                       goto release;
+               pr_err("Cannot set array shape for %s\n",
+                      devname);
+               if (err == EBUSY &&
+                   (info->array.state & (1<<MD_SB_BITMAP_PRESENT)))
+                       cont_err("       Bitmap must be removed before"
+                                " shape can be changed\n");
+               goto release;
        }
        sysfs_free(sra);
        return 0;
@@ -2379,7 +2520,7 @@ static void get_space_after(int fd, struct supertype *st, struct mdinfo *info)
        unsigned long long min_space_before = 0, min_space_after = 0;
        int first = 1;
 
-       sra = sysfs_read(fd, 0, GET_DEVS);
+       sra = sysfs_read(fd, NULL, GET_DEVS);
        if (!sra)
                return;
        for (sd = sra->devs; sd; sd = sd->next) {
@@ -2419,6 +2560,246 @@ static void get_space_after(int fd, struct supertype *st, struct mdinfo *info)
        sysfs_free(sra);
 }
 
+static void update_cache_size(char *container, struct mdinfo *sra,
+                             struct mdinfo *info,
+                             int disks, unsigned long long blocks)
+{
+       /* Check that the internal stripe cache is
+        * large enough, or it won't work.
+        * It must hold at least 4 stripes of the larger
+        * chunk size
+        */
+       unsigned long cache;
+       cache = max(info->array.chunk_size, info->new_chunk);
+       cache *= 4; /* 4 stripes minimum */
+       cache /= 512; /* convert to sectors */
+       /* make sure there is room for 'blocks' with a bit to spare */
+       if (cache < 16 + blocks / disks)
+               cache = 16 + blocks / disks;
+       cache /= (4096/512); /* Covert from sectors to pages */
+
+       if (sra->cache_size < cache)
+               subarray_set_num(container, sra, "stripe_cache_size",
+                                cache+1);
+}
+
+static int impose_reshape(struct mdinfo *sra,
+                         struct mdinfo *info,
+                         struct supertype *st,
+                         int fd,
+                         int restart,
+                         char *devname, char *container,
+                         struct reshape *reshape)
+{
+       struct mdu_array_info_s array;
+
+       sra->new_chunk = info->new_chunk;
+
+       if (restart) {
+               /* for external metadata checkpoint saved by mdmon can be lost
+                * or missed /due to e.g. crash/. Check if md is not during
+                * restart farther than metadata points to.
+                * If so, this means metadata information is obsolete.
+                */
+               if (st->ss->external)
+                       verify_reshape_position(info, reshape->level);
+               sra->reshape_progress = info->reshape_progress;
+       } else {
+               sra->reshape_progress = 0;
+               if (reshape->after.data_disks < reshape->before.data_disks)
+                       /* start from the end of the new array */
+                       sra->reshape_progress = (sra->component_size
+                                                * reshape->after.data_disks);
+       }
+
+       ioctl(fd, GET_ARRAY_INFO, &array);
+       if (info->array.chunk_size == info->new_chunk &&
+           reshape->before.layout == reshape->after.layout &&
+           st->ss->external == 0) {
+               /* use SET_ARRAY_INFO but only if reshape hasn't started */
+               array.raid_disks = reshape->after.data_disks + reshape->parity;
+               if (!restart &&
+                   ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
+                       int err = errno;
+
+                       pr_err("Cannot set device shape for %s: %s\n",
+                              devname, strerror(errno));
+
+                       if (err == EBUSY &&
+                           (array.state & (1<<MD_SB_BITMAP_PRESENT)))
+                               cont_err("Bitmap must be removed before"
+                                        " shape can be changed\n");
+
+                       goto release;
+               }
+       } else if (!restart) {
+               /* set them all just in case some old 'new_*' value
+                * persists from some earlier problem.
+                */
+               int err = 0;
+               if (sysfs_set_num(sra, NULL, "chunk_size", info->new_chunk) < 0)
+                       err = errno;
+               if (!err && sysfs_set_num(sra, NULL, "layout",
+                                         reshape->after.layout) < 0)
+                       err = errno;
+               if (!err && subarray_set_num(container, sra, "raid_disks",
+                                            reshape->after.data_disks +
+                                            reshape->parity) < 0)
+                       err = errno;
+               if (err) {
+                       pr_err("Cannot set device shape for %s\n",
+                               devname);
+
+                       if (err == EBUSY &&
+                           (array.state & (1<<MD_SB_BITMAP_PRESENT)))
+                               cont_err("Bitmap must be removed before"
+                                        " shape can be changed\n");
+                       goto release;
+               }
+       }
+       return 0;
+release:
+       return -1;
+}
+
+static int impose_level(int fd, int level, char *devname, int verbose)
+{
+       char *c;
+       struct mdu_array_info_s array;
+       struct mdinfo info;
+       sysfs_init(&info, fd, NULL);
+
+       ioctl(fd, GET_ARRAY_INFO, &array);
+       if (level == 0 &&
+           (array.level >= 4 && array.level <= 6)) {
+               /* To convert to RAID0 we need to fail and
+                * remove any non-data devices. */
+               int found = 0;
+               int d;
+               int data_disks = array.raid_disks - 1;
+               if (array.level == 6)
+                       data_disks -= 1;
+               if (array.level == 5 &&
+                   array.layout != ALGORITHM_PARITY_N)
+                       return -1;
+               if (array.level == 6 &&
+                   array.layout != ALGORITHM_PARITY_N_6)
+                       return -1;
+               sysfs_set_str(&info, NULL,"sync_action", "idle");
+               /* First remove any spares so no recovery starts */
+               for (d = 0, found = 0;
+                    d < MAX_DISKS && found < array.nr_disks;
+                    d++) {
+                       mdu_disk_info_t disk;
+                       disk.number = d;
+                       if (ioctl(fd, GET_DISK_INFO, &disk) < 0)
+                               continue;
+                       if (disk.major == 0 && disk.minor == 0)
+                               continue;
+                       found++;
+                       if ((disk.state & (1 << MD_DISK_ACTIVE))
+                           && disk.raid_disk < data_disks)
+                               /* keep this */
+                               continue;
+                       ioctl(fd, HOT_REMOVE_DISK,
+                             makedev(disk.major, disk.minor));
+               }
+               /* Now fail anything left */
+               ioctl(fd, GET_ARRAY_INFO, &array);
+               for (d = 0, found = 0;
+                    d < MAX_DISKS && found < array.nr_disks;
+                    d++) {
+                       int cnt;
+                       mdu_disk_info_t disk;
+                       disk.number = d;
+                       if (ioctl(fd, GET_DISK_INFO, &disk) < 0)
+                               continue;
+                       if (disk.major == 0 && disk.minor == 0)
+                               continue;
+                       found++;
+                       if ((disk.state & (1 << MD_DISK_ACTIVE))
+                           && disk.raid_disk < data_disks)
+                               /* keep this */
+                               continue;
+                       ioctl(fd, SET_DISK_FAULTY,
+                             makedev(disk.major, disk.minor));
+                       cnt = 5;
+                       while (ioctl(fd, HOT_REMOVE_DISK,
+                                    makedev(disk.major, disk.minor)) < 0
+                              && errno == EBUSY
+                              && cnt--) {
+                               usleep(10000);
+                       }
+               }
+       }
+       c = map_num(pers, level);
+       if (c) {
+               int err = sysfs_set_str(&info, NULL, "level", c);
+               if (err) {
+                       err = errno;
+                       pr_err("%s: could not set level to %s\n",
+                               devname, c);
+                       if (err == EBUSY &&
+                           (array.state & (1<<MD_SB_BITMAP_PRESENT)))
+                               cont_err("Bitmap must be removed"
+                                        " before level can be changed\n");
+                       return err;
+               }
+               if (verbose >= 0)
+                       pr_err("level of %s changed to %s\n",
+                               devname, c);
+       }
+       return 0;
+}
+
+int sigterm = 0;
+static void catch_term(int sig)
+{
+       sigterm = 1;
+}
+
+static int continue_via_systemd(char *devnm)
+{
+       int skipped, i, pid, status;
+       char pathbuf[1024];
+       /* In a systemd/udev world, it is best to get systemd to
+        * run "mdadm --grow --continue" rather than running in the
+        * background.
+        */
+       switch(fork()) {
+       case  0:
+               /* FIXME yuk. CLOSE_EXEC?? */
+               skipped = 0;
+               for (i = 3; skipped < 20; i++)
+                       if (close(i) < 0)
+                               skipped++;
+                       else
+                               skipped = 0;
+
+               /* Don't want to see error messages from
+                * systemctl.  If the service doesn't exist,
+                * we fork ourselves.
+                */
+               close(2);
+               open("/dev/null", O_WRONLY);
+               snprintf(pathbuf, sizeof(pathbuf), "mdadm-grow-continue@%s.service",
+                        devnm);
+               status = execl("/usr/bin/systemctl", "systemctl",
+                              "start",
+                              pathbuf, NULL);
+               status = execl("/bin/systemctl", "systemctl", "start",
+                              pathbuf, NULL);
+               exit(1);
+       case -1: /* Just do it ourselves. */
+               break;
+       default: /* parent - good */
+               pid = wait(&status);
+               if (pid >= 0 && status == 0)
+                       return 1;
+       }
+       return 0;
+}
+
 static int reshape_array(char *container, int fd, char *devname,
                         struct supertype *st, struct mdinfo *info,
                         int force, struct mddev_dev *devlist,
@@ -2430,7 +2811,7 @@ static int reshape_array(char *container, int fd, char *devname,
        int spares_needed;
        char *msg;
        int orig_level = UnSet;
-       int disks, odisks;
+       int odisks;
        int delayed;
 
        struct mdu_array_info_s array;
@@ -2445,10 +2826,10 @@ static int reshape_array(char *container, int fd, char *devname,
        int nrdisks;
        int err;
        unsigned long blocks;
-       unsigned long cache;
        unsigned long long array_size;
        int done;
        struct mdinfo *sra = NULL;
+       char buf[20];
 
        /* when reshaping a RAID0, the component_size might be zero.
         * So try to fix that up.
@@ -2471,7 +2852,7 @@ static int reshape_array(char *container, int fd, char *devname,
                info->new_level = UnSet;
                if (info->delta_disks > 0)
                        info->array.raid_disks -= info->delta_disks;
-               msg = analyse_change(info, &reshape);
+               msg = analyse_change(devname, info, &reshape);
                info->new_level = new_level;
                if (info->delta_disks > 0)
                        info->array.raid_disks += info->delta_disks;
@@ -2479,9 +2860,11 @@ static int reshape_array(char *container, int fd, char *devname,
                        /* Make sure the array isn't read-only */
                        ioctl(fd, RESTART_ARRAY_RW, 0);
        } else
-               msg = analyse_change(info, &reshape);
+               msg = analyse_change(devname, info, &reshape);
        if (msg) {
-               pr_err("%s\n", msg);
+               /* if msg == "", error has already been printed */
+               if (msg[0])
+                       pr_err("%s\n", msg);
                goto release;
        }
        if (restart &&
@@ -2494,7 +2877,9 @@ static int reshape_array(char *container, int fd, char *devname,
                goto release;
        }
 
-       if (st->ss->external && restart && (info->reshape_progress == 0)) {
+       if (st->ss->external && restart && (info->reshape_progress == 0) &&
+           !((sysfs_get_str(info, NULL, "sync_action", buf, sizeof(buf)) > 0) &&
+             (strncmp(buf, "reshape", 7) == 0))) {
                /* When reshape is restarted from '0', very begin of array
                 * it is possible that for external metadata reshape and array
                 * configuration doesn't happen.
@@ -2510,6 +2895,22 @@ static int reshape_array(char *container, int fd, char *devname,
                /* reshape already started. just skip to monitoring the reshape */
                if (reshape.backup_blocks == 0)
                        return 0;
+               if (restart & RESHAPE_NO_BACKUP)
+                       return 0;
+
+               /* Need 'sra' down at 'started:' */
+               sra = sysfs_read(fd, NULL,
+                                GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE|GET_CHUNK|
+                                GET_CACHE);
+               if (!sra) {
+                       pr_err("%s: Cannot get array details from sysfs\n",
+                              devname);
+                       goto release;
+               }
+
+               if (!backup_file)
+                       backup_file = locate_backup(sra->sys_name);
+
                goto started;
        }
        /* The container is frozen but the array may not be.
@@ -2552,37 +2953,23 @@ static int reshape_array(char *container, int fd, char *devname,
        }
 
        if (reshape.level != array.level) {
-               char *c = map_num(pers, reshape.level);
-               int err;
-               if (c == NULL)
-                       goto release;
-
-               err = sysfs_set_str(info, NULL, "level", c);
-               if (err) {
-                       err = errno;
-                       pr_err("%s: could not set level to %s\n",
-                               devname, c);
-                       if (err == EBUSY &&
-                           (info->array.state & (1<<MD_SB_BITMAP_PRESENT)))
-                               cont_err("Bitmap must be removed"
-                                        " before level can be changed\n");
+               int err = impose_level(fd, reshape.level, devname, verbose);
+               if (err)
                        goto release;
-               }
-               if (verbose >= 0)
-                       pr_err("level of %s changed to %s\n",
-                               devname, c);
+               info->new_layout = UnSet; /* after level change,
+                                          * layout is meaningless */
                orig_level = array.level;
                sysfs_freeze_array(info);
 
                if (reshape.level > 0 && st->ss->external) {
                        /* make sure mdmon is aware of the new level */
-                       if (mdmon_running(st->container_dev))
+                       if (mdmon_running(container))
                                flush_mdmon(container);
 
-                       if (!mdmon_running(st->container_dev))
-                               start_mdmon(st->container_dev);
+                       if (!mdmon_running(container))
+                               start_mdmon(container);
                        ping_monitor(container);
-                       if (mdmon_running(st->container_dev) &&
+                       if (mdmon_running(container) &&
                            st->update_tail == NULL)
                                st->update_tail = &st->updates;
                }
@@ -2599,7 +2986,7 @@ static int reshape_array(char *container, int fd, char *devname,
                struct mdinfo *d;
 
                if (info2) {
-                       sysfs_init(info2, fd, st->devnum);
+                       sysfs_init(info2, fd, st->devnm);
                        /* When increasing number of devices, we need to set
                         * new raid_disks before adding these, or they might
                         * be rejected.
@@ -2629,6 +3016,8 @@ static int reshape_array(char *container, int fd, char *devname,
                Manage_subdevs(devname, fd, devlist, verbose,
                               0,NULL, 0);
 
+       if (reshape.backup_blocks == 0 && data_offset != INVALID_SECTORS)
+               reshape.backup_blocks = reshape.before.data_disks * info->array.chunk_size/512;
        if (reshape.backup_blocks == 0) {
                /* No restriping needed, but we might need to impose
                 * some more changes: layout, raid_disks, chunk_size
@@ -2719,10 +3108,8 @@ static int reshape_array(char *container, int fd, char *devname,
                goto release;
        }
 
-started:
-
        if (array.level == 10) {
-               /* Reshaping RAID10 does not require and data backup by
+               /* Reshaping RAID10 does not require any data backup by
                 * user-space.  Instead it requires that the data_offset
                 * is changed to avoid the need for backup.
                 * So this is handled very separately
@@ -2734,7 +3121,7 @@ started:
                                      &reshape, data_offset,
                                      force, verbose);
        }
-       sra = sysfs_read(fd, 0,
+       sra = sysfs_read(fd, NULL,
                         GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE|GET_CHUNK|
                         GET_CACHE);
        if (!sra) {
@@ -2743,6 +3130,58 @@ started:
                goto release;
        }
 
+       if (!backup_file)
+               switch(set_new_data_offset(sra, st, devname,
+                                          reshape.after.data_disks - reshape.before.data_disks,
+                                          data_offset,
+                                          reshape.min_offset_change, 1)) {
+       case -1:
+               goto release;
+       case 0:
+               /* Updated data_offset, so it's easy now */
+               update_cache_size(container, sra, info,
+                                 min(reshape.before.data_disks,
+                                     reshape.after.data_disks),
+                                 reshape.backup_blocks);
+
+               /* Right, everything seems fine. Let's kick things off.
+                */
+               sync_metadata(st);
+
+               if (impose_reshape(sra, info, st, fd, restart,
+                                  devname, container, &reshape) < 0)
+                       goto release;
+               if (sysfs_set_str(sra, NULL, "sync_action", "reshape") < 0) {
+                       pr_err("Failed to initiate reshape!\n");
+                       goto release;
+               }
+               if (info->new_level == reshape.level)
+                       return 0;
+               /* need to adjust level when reshape completes */
+               switch(fork()) {
+               case -1: /* ignore error, but don't wait */
+                       return 0;
+               default: /* parent */
+                       return 0;
+               case 0:
+                       map_fork();
+                       break;
+               }
+               close(fd);
+               wait_reshape(sra);
+               fd = open_dev(sra->sys_name);
+               if (fd >= 0)
+                       impose_level(fd, info->new_level, devname, verbose);
+               return 0;
+       case 1: /* Couldn't set data_offset, try the old way */
+               if (data_offset != INVALID_SECTORS) {
+                       pr_err("Cannot update data_offset on this array\n");
+                       goto release;
+               }
+               break;
+       }
+
+started:
        /* Decide how many blocks (sectors) for a reshape
         * unit.  The number we have so far is just a minimum
         */
@@ -2787,8 +3226,9 @@ started:
                if (backup_file == NULL) {
                        if (reshape.after.data_disks <=
                            reshape.before.data_disks) {
-                               pr_err("%s: Cannot grow - "
-                                       "need backup-file\n", devname);
+                               pr_err("%s: Cannot grow - need backup-file\n",
+                                      devname);
+                               pr_err(" Please provide one with \"--backup=...\"\n");
                                goto release;
                        } else if (sra->array.spare_disks == 0) {
                                pr_err("%s: Cannot grow - "
@@ -2800,6 +3240,7 @@ started:
                        if (!reshape_open_backup_file(backup_file, fd, devname,
                                                      (signed)blocks,
                                                      fdlist+d, offsets+d,
+                                                     sra->sys_name,
                                                      restart)) {
                                goto release;
                        }
@@ -2807,23 +3248,9 @@ started:
                }
        }
 
-       /* lastly, check that the internal stripe cache is
-        * large enough, or it won't work.
-        * It must hold at least 4 stripes of the larger
-        * chunk size
-        */
-       cache = max(info->array.chunk_size, info->new_chunk);
-       cache *= 4; /* 4 stripes minimum */
-       cache /= 512; /* convert to sectors */
-       disks = min(reshape.before.data_disks, reshape.after.data_disks);
-       /* make sure there is room for 'blocks' with a bit to spare */
-       if (cache < 16 + blocks / disks)
-               cache = 16 + blocks / disks;
-       cache /= (4096/512); /* Covert from sectors to pages */
-
-       if (sra->cache_size < cache)
-               subarray_set_num(container, sra, "stripe_cache_size",
-                                cache+1);
+       update_cache_size(container, sra, info,
+                         min(reshape.before.data_disks, reshape.after.data_disks),
+                         blocks);
 
        /* Right, everything seems fine. Let's kick things off.
         * If only changing raid_disks, use ioctl, else use
@@ -2831,70 +3258,9 @@ started:
         */
        sync_metadata(st);
 
-       sra->new_chunk = info->new_chunk;
-
-       if (restart) {
-               /* for external metadata checkpoint saved by mdmon can be lost
-                * or missed /due to e.g. crash/. Check if md is not during
-                * restart farther than metadata points to.
-                * If so, this means metadata information is obsolete.
-                */
-               if (st->ss->external)
-                       verify_reshape_position(info, reshape.level);
-               sra->reshape_progress = info->reshape_progress;
-       } else {
-               sra->reshape_progress = 0;
-               if (reshape.after.data_disks < reshape.before.data_disks)
-                       /* start from the end of the new array */
-                       sra->reshape_progress = (sra->component_size
-                                                * reshape.after.data_disks);
-       }
-
-       if (info->array.chunk_size == info->new_chunk &&
-           reshape.before.layout == reshape.after.layout &&
-           st->ss->external == 0) {
-               /* use SET_ARRAY_INFO but only if reshape hasn't started */
-               ioctl(fd, GET_ARRAY_INFO, &array);
-               array.raid_disks = reshape.after.data_disks + reshape.parity;
-               if (!restart &&
-                   ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
-                       int err = errno;
-
-                       pr_err("Cannot set device shape for %s: %s\n",
-                              devname, strerror(errno));
-
-                       if (err == EBUSY &&
-                           (array.state & (1<<MD_SB_BITMAP_PRESENT)))
-                               cont_err("Bitmap must be removed before"
-                                        " shape can be changed\n");
-
-                       goto release;
-               }
-       } else if (!restart) {
-               /* set them all just in case some old 'new_*' value
-                * persists from some earlier problem.
-                */
-               int err = 0;
-               if (sysfs_set_num(sra, NULL, "chunk_size", info->new_chunk) < 0)
-                       err = errno;
-               if (!err && sysfs_set_num(sra, NULL, "layout",
-                                         reshape.after.layout) < 0)
-                       err = errno;
-               if (!err && subarray_set_num(container, sra, "raid_disks",
-                                            reshape.after.data_disks +
-                                            reshape.parity) < 0)
-                       err = errno;
-               if (err) {
-                       pr_err("Cannot set device shape for %s\n",
-                               devname);
-
-                       if (err == EBUSY &&
-                           (array.state & (1<<MD_SB_BITMAP_PRESENT)))
-                               cont_err("Bitmap must be removed before"
-                                        " shape can be changed\n");
-                       goto release;
-               }
-       }
+       if (impose_reshape(sra, info, st, fd, restart,
+                          devname, container, &reshape) < 0)
+               goto release;
 
        err = start_reshape(sra, restart, reshape.before.data_disks,
                            reshape.after.data_disks);
@@ -2916,6 +3282,14 @@ started:
                return 1;
        }
 
+       if (!forked && !check_env("MDADM_NO_SYSTEMCTL"))
+               if (continue_via_systemd(container ?: sra->sys_name)) {
+                       free(fdlist);
+                       free(offsets);
+                       sysfs_free(sra);
+                       return 0;
+               }
+
        /* Now we just need to kick off the reshape and watch, while
         * handling backups of the data...
         * This is all done by a forked background process.
@@ -2946,9 +3320,9 @@ started:
        do {
                struct mdstat_ent *mds, *m;
                delayed = 0;
-               mds = mdstat_read(0, 0);
-               for (m = mds; m; m = mds->next)
-                       if (m->devnum == devname2devnum(sra->sys_name)) {
+               mds = mdstat_read(1, 0);
+               for (m = mds; m; m = m->next)
+                       if (strcmp(m->devnm, sra->sys_name) == 0) {
                                if (m->resync &&
                                    m->percent == RESYNC_DELAYED)
                                        delayed = 1;
@@ -2966,9 +3340,9 @@ started:
                        delayed = 0;
                }
                if (delayed)
-                       sleep(30 - (delayed-1) * 25);
+                       mdstat_wait(30 - (delayed-1) * 25);
        } while (delayed);
-
+       mdstat_close();
        close(fd);
        if (check_env("MDADM_GROW_VERIFY"))
                fd = open(devname, O_RDONLY | O_DIRECT);
@@ -2976,6 +3350,8 @@ started:
                fd = -1;
        mlockall(MCL_FUTURE);
 
+       signal(SIGTERM, catch_term);
+
        if (st->ss->external) {
                /* metadata handler takes it from here */
                done = st->ss->manage_reshape(
@@ -2993,8 +3369,21 @@ started:
        free(fdlist);
        free(offsets);
 
-       if (backup_file && done)
+       if (backup_file && done) {
+               char *bul;
+               bul = make_backup(sra->sys_name);
+               if (bul) {
+                       char buf[1024];
+                       int l = readlink(bul, buf, sizeof(buf));
+                       if (l > 0) {
+                               buf[l]=0;
+                               unlink(buf);
+                       }
+                       unlink(bul);
+                       free(bul);
+               }
                unlink(backup_file);
+       }
        if (!done) {
                abort_reshape(sra);
                goto out;
@@ -3015,7 +3404,7 @@ started:
 
        if (st->ss->external) {
                /* Re-load the metadata as much could have changed */
-               int cfd = open_dev(st->container_dev);
+               int cfd = open_dev(st->container_devnm);
                if (cfd >= 0) {
                        flush_mdmon(container);
                        st->ss->free_super(st);
@@ -3033,14 +3422,10 @@ started:
                set_array_size(st, info, info->text_version);
 
        if (info->new_level != reshape.level) {
-
-               c = map_num(pers, info->new_level);
-               if (c) {
-                       err = sysfs_set_str(sra, NULL, "level", c);
-                       if (err)
-                               pr_err("%s: could not set level "
-                                      "to %s\n", devname, c);
-               }
+               if (fd < 0)
+                       fd = open(devname, O_RDONLY);
+               impose_level(fd, info->new_level, devname, verbose);
+               close(fd);
                if (info->new_level == 0)
                        st->update_tail = NULL;
        }
@@ -3072,12 +3457,12 @@ int reshape_container(char *container, char *devname,
                      struct supertype *st,
                      struct mdinfo *info,
                      int force,
-                     char *backup_file,
-                     int verbose, int restart, int freeze_reshape)
+                     char *backup_file, int verbose,
+                     int forked, int restart, int freeze_reshape)
 {
        struct mdinfo *cc = NULL;
        int rv = restart;
-       int last_devnum = -1;
+       char last_devnm[32] = "";
 
        /* component_size is not meaningful for a container,
         * so pass '0' meaning 'no change'
@@ -3098,15 +3483,19 @@ int reshape_container(char *container, char *devname,
         */
        ping_monitor(container);
 
-       switch (fork()) {
+       if (!forked && !freeze_reshape && !check_env("MDADM_NO_SYSTEMCTL"))
+               if (continue_via_systemd(container))
+                       return 0;
+
+       switch (forked ? 0 : fork()) {
        case -1: /* error */
                perror("Cannot fork to complete reshape\n");
                unfreeze(st);
                return 1;
        default: /* parent */
                if (!freeze_reshape)
-                       printf(Name ": multi-array reshape continues"
-                              " in background\n");
+                       printf("%s: multi-array reshape continues"
+                              " in background\n", Name);
                return 0;
        case 0: /* child */
                map_fork();
@@ -3134,6 +3523,7 @@ int reshape_container(char *container, char *devname,
                int fd;
                struct mdstat_ent *mdstat;
                char *adev;
+               int devid;
 
                sysfs_free(cc);
 
@@ -3145,13 +3535,12 @@ int reshape_container(char *container, char *devname,
                                continue;
 
                        subarray = strchr(content->text_version+1, '/')+1;
-                       mdstat = mdstat_by_subdev(subarray,
-                                                 devname2devnum(container));
+                       mdstat = mdstat_by_subdev(subarray, container);
                        if (!mdstat)
                                continue;
                        if (mdstat->active == 0) {
-                               pr_err("Skipping inactive "
-                                       "array md%i.\n", mdstat->devnum);
+                               pr_err("Skipping inactive array %s.\n",
+                                      mdstat->devnm);
                                free_mdstat(mdstat);
                                mdstat = NULL;
                                continue;
@@ -3161,20 +3550,18 @@ int reshape_container(char *container, char *devname,
                if (!content)
                        break;
 
-               adev = map_dev(dev2major(mdstat->devnum),
-                              dev2minor(mdstat->devnum),
-                              0);
+               devid = devnm2devid(mdstat->devnm);
+               adev = map_dev(major(devid), minor(devid), 0);
                if (!adev)
                        adev = content->text_version;
 
-               fd = open_dev(mdstat->devnum);
+               fd = open_dev(mdstat->devnm);
                if (fd < 0) {
-                       printf(Name ": Device %s cannot be opened for reshape.",
-                              adev);
+                       pr_err("Device %s cannot be opened for reshape.\n", adev);
                        break;
                }
 
-               if (last_devnum == mdstat->devnum) {
+               if (strcmp(last_devnm, mdstat->devnm) == 0) {
                        /* Do not allow for multiple reshape_array() calls for
                         * the same array.
                         * It can happen when reshape_array() returns without
@@ -3185,20 +3572,20 @@ int reshape_container(char *container, char *devname,
                         * This is possibly interim until the behaviour of
                         * reshape_array is resolved().
                         */
-                       printf(Name ": Multiple reshape execution detected for "
-                              "device  %s.", adev);
+                       printf("%s: Multiple reshape execution detected for "
+                              "device  %s.\n", Name, adev);
                        close(fd);
                        break;
                }
-               last_devnum = mdstat->devnum;
+               strcpy(last_devnm, mdstat->devnm);
 
-               sysfs_init(content, fd, mdstat->devnum);
+               sysfs_init(content, fd, mdstat->devnm);
 
-               if (mdmon_running(devname2devnum(container)))
+               if (mdmon_running(container))
                        flush_mdmon(container);
 
                rv = reshape_array(container, fd, adev, st,
-                                  content, force, NULL, 0ULL,
+                                  content, force, NULL, INVALID_SECTORS,
                                   backup_file, verbose, 1, restart,
                                   freeze_reshape);
                close(fd);
@@ -3212,7 +3599,7 @@ int reshape_container(char *container, char *devname,
                if (rv)
                        break;
 
-               if (mdmon_running(devname2devnum(container)))
+               if (mdmon_running(container))
                        flush_mdmon(container);
        }
        if (!rv)
@@ -3249,7 +3636,7 @@ int progress_reshape(struct mdinfo *info, struct reshape *reshape,
                     unsigned long long backup_point,
                     unsigned long long wait_point,
                     unsigned long long *suspend_point,
-                    unsigned long long *reshape_completed)
+                    unsigned long long *reshape_completed, int *frozen)
 {
        /* This function is called repeatedly by the reshape manager.
         * It determines how much progress can safely be made and allows
@@ -3466,7 +3853,8 @@ int progress_reshape(struct mdinfo *info, struct reshape *reshape,
                wait_point = info->component_size - wait_point;
        }
 
-       sysfs_set_num(info, NULL, "sync_max", max_progress);
+       if (!*frozen)
+               sysfs_set_num(info, NULL, "sync_max", max_progress);
 
        /* Now wait.  If we have already reached the point that we were
         * asked to wait to, don't wait at all, else wait for any change.
@@ -3486,7 +3874,6 @@ int progress_reshape(struct mdinfo *info, struct reshape *reshape,
                 * waiting forever on a dead array
                 */
                char action[20];
-               fd_set rfds;
                if (sysfs_get_str(info, NULL, "sync_action",
                                  action, 20) <= 0 ||
                    strncmp(action, "reshape", 7) != 0)
@@ -3502,9 +3889,7 @@ int progress_reshape(struct mdinfo *info, struct reshape *reshape,
                    && info->reshape_progress < (info->component_size
                                                 * reshape->after.data_disks))
                        break;
-               FD_ZERO(&rfds);
-               FD_SET(fd, &rfds);
-               select(fd+1, NULL, NULL, &rfds, NULL);
+               sysfs_wait(fd, NULL);
                if (sysfs_fd_get_ll(fd, &completed) < 0)
                        goto check_progress;
        }
@@ -3549,23 +3934,24 @@ check_progress:
                /* The abort might only be temporary.  Wait up to 10
                 * seconds for fd to contain a valid number again.
                 */
-               struct timeval tv;
+               int wait = 10000;
                int rv = -2;
-               tv.tv_sec = 10;
-               tv.tv_usec = 0;
-               while (fd >= 0 && rv < 0 && tv.tv_sec > 0) {
-                       fd_set rfds;
-                       FD_ZERO(&rfds);
-                       FD_SET(fd, &rfds);
-                       if (select(fd+1, NULL, NULL, &rfds, &tv) != 1)
+               unsigned long long new_sync_max;
+               while (fd >= 0 && rv < 0 && wait > 0) {
+                       if (sysfs_wait(fd, &wait) != 1)
                                break;
                        switch (sysfs_fd_get_ll(fd, &completed)) {
                        case 0:
                                /* all good again */
                                rv = 1;
+                               /* If "sync_max" is no longer max_progress
+                                * we need to freeze things
+                                */
+                               sysfs_get_ll(info, NULL, "sync_max", &new_sync_max);
+                               *frozen = (new_sync_max != max_progress);
                                break;
                        case -2: /* read error - abort */
-                               tv.tv_sec = 0;
+                               wait = 0;
                                break;
                        }
                }
@@ -3857,6 +4243,7 @@ int child_monitor(int afd, struct mdinfo *sra, struct reshape *reshape,
        struct mdinfo *sd;
        unsigned long stripes;
        int uuid[4];
+       int frozen = 0;
 
        /* set up the backup-super-block.  This requires the
         * uuid from the array.
@@ -3934,9 +4321,11 @@ int child_monitor(int afd, struct mdinfo *sra, struct reshape *reshape,
                                wait_point = __le64_to_cpu(bsb.arraystart2);
                }
 
+               reshape_completed = sra->reshape_progress;
                rv = progress_reshape(sra, reshape,
                                      backup_point, wait_point,
-                                     &suspend_point, &reshape_completed);
+                                     &suspend_point, &reshape_completed,
+                                     &frozen);
                /* external metadata would need to ping_monitor here */
                sra->reshape_progress = reshape_completed;
 
@@ -3962,7 +4351,8 @@ int child_monitor(int afd, struct mdinfo *sra, struct reshape *reshape,
                                forget_backup(dests, destfd,
                                              destoffsets, 1);
                }
-
+               if (sigterm)
+                       rv = -2;
                if (rv < 0) {
                        if (rv == -1)
                                done = 1;
@@ -3970,6 +4360,7 @@ int child_monitor(int afd, struct mdinfo *sra, struct reshape *reshape,
                }
                if (rv == 0 && increasing && !st->ss->external) {
                        /* No longer need to monitor this reshape */
+                       sysfs_set_str(sra, NULL, "sync_max", "max");
                        done = 1;
                        break;
                }
@@ -4023,7 +4414,12 @@ int child_monitor(int afd, struct mdinfo *sra, struct reshape *reshape,
        }
 
        /* FIXME maybe call progress_reshape one more time instead */
-       abort_reshape(sra); /* remove any remaining suspension */
+       /* remove any remaining suspension */
+       sysfs_set_num(sra, NULL, "suspend_lo", 0x7FFFFFFFFFFFFFFFULL);
+       sysfs_set_num(sra, NULL, "suspend_hi", 0);
+       sysfs_set_num(sra, NULL, "suspend_lo", 0);
+       sysfs_set_num(sra, NULL, "sync_min", 0);
+
        if (reshape->before.data_disks == reshape->after.data_disks)
                sysfs_set_num(sra, NULL, "sync_speed_min", speed);
        free(buf);
@@ -4214,7 +4610,7 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt
                        st->ss->free_super(st);
                        offsets[j] = dinfo.data_offset * 512;
                }
-               printf(Name ": restoring critical section\n");
+               printf("%s: restoring critical section\n", Name);
 
                if (restore_stripes(fdlist, offsets,
                                    info->array.raid_disks,
@@ -4358,7 +4754,6 @@ int Grow_continue_command(char *devname, int fd,
        char *subarray = NULL;
        struct mdinfo *cc = NULL;
        struct mdstat_ent *mdstat = NULL;
-       char buf[40];
        int cfd = -1;
        int fd2 = -1;
 
@@ -4374,7 +4769,7 @@ int Grow_continue_command(char *devname, int fd,
        dprintf("Grow continue is run for ");
        if (st->ss->external == 0) {
                int d;
-               dprintf("native array (%s)\n", devname);
+               dprintf_cont("native array (%s)\n", devname);
                if (ioctl(fd, GET_ARRAY_INFO, &array.array) < 0) {
                        pr_err("%s is not an active md array -"
                                " aborting\n", devname);
@@ -4405,6 +4800,8 @@ int Grow_continue_command(char *devname, int fd,
                                continue;
                        err = st->ss->load_super(st, fd2, NULL);
                        close(fd2);
+                       /* invalidate fd2 to avoid possible double close() */
+                       fd2 = -1;
                        if (err)
                                continue;
                        break;
@@ -4417,17 +4814,17 @@ int Grow_continue_command(char *devname, int fd,
                }
                st->ss->getinfo_super(st, content, NULL);
        } else {
-               int container_dev;
+               char *container;
 
                if (subarray) {
-                       dprintf("subarray (%s)\n", subarray);
-                       container_dev = st->container_dev;
-                       cfd = open_dev_excl(st->container_dev);
+                       dprintf_cont("subarray (%s)\n", subarray);
+                       container = st->container_devnm;
+                       cfd = open_dev_excl(st->container_devnm);
                } else {
-                       container_dev = st->devnum;
+                       container = st->devnm;
                        close(fd);
-                       cfd = open_dev_excl(st->devnum);
-                       dprintf("container (%i)\n", container_dev);
+                       cfd = open_dev_excl(st->devnm);
+                       dprintf_cont("container (%s)\n", container);
                        fd = cfd;
                }
                if (cfd < 0) {
@@ -4436,7 +4833,6 @@ int Grow_continue_command(char *devname, int fd,
                        ret_val = 1;
                        goto Grow_continue_command_exit;
                }
-               fmt_devname(buf, container_dev);
 
                /* find in container array under reshape
                 */
@@ -4472,18 +4868,18 @@ int Grow_continue_command(char *devname, int fd,
                                pr_err("cannot continue reshape of an array"
                                       " in container with unsupported"
                                       " metadata: %s(%s)\n",
-                                      devname, buf);
+                                      devname, container);
                                ret_val = 1;
                                goto Grow_continue_command_exit;
                        }
 
                        array = strchr(content->text_version+1, '/')+1;
-                       mdstat = mdstat_by_subdev(array, container_dev);
+                       mdstat = mdstat_by_subdev(array, container);
                        if (!mdstat)
                                continue;
                        if (mdstat->active == 0) {
-                               pr_err("Skipping inactive "
-                                       "array md%i.\n", mdstat->devnum);
+                               pr_err("Skipping inactive array %s.\n",
+                                      mdstat->devnm);
                                free_mdstat(mdstat);
                                mdstat = NULL;
                                continue;
@@ -4496,23 +4892,22 @@ int Grow_continue_command(char *devname, int fd,
                        ret_val = 1;
                        goto Grow_continue_command_exit;
                }
-               fd2 = open_dev(mdstat->devnum);
+               fd2 = open_dev(mdstat->devnm);
                if (fd2 < 0) {
-                       pr_err("cannot open (md%i)\n",
-                               mdstat->devnum);
+                       pr_err("cannot open (%s)\n", mdstat->devnm);
                        ret_val = 1;
                        goto Grow_continue_command_exit;
                }
 
-               sysfs_init(content, fd2, mdstat->devnum);
+               sysfs_init(content, fd2, mdstat->devnm);
 
                /* start mdmon in case it is not running
                 */
-               if (!mdmon_running(container_dev))
-                       start_mdmon(container_dev);
-               ping_monitor(buf);
+               if (!mdmon_running(container))
+                       start_mdmon(container);
+               ping_monitor(container);
 
-               if (mdmon_running(container_dev))
+               if (mdmon_running(container))
                        st->update_tail = &st->updates;
                else {
                        pr_err("No mdmon found. "
@@ -4532,7 +4927,7 @@ int Grow_continue_command(char *devname, int fd,
 
        /* continue reshape
         */
-       ret_val = Grow_continue(fd, st, content, backup_file, 0);
+       ret_val = Grow_continue(fd, st, content, backup_file, 1, 0);
 
 Grow_continue_command_exit:
        if (fd2 > -1)
@@ -4548,7 +4943,7 @@ Grow_continue_command_exit:
 }
 
 int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info,
-                 char *backup_file, int freeze_reshape)
+                 char *backup_file, int forked, int freeze_reshape)
 {
        int ret_val = 2;
 
@@ -4556,22 +4951,49 @@ int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info,
                return ret_val;
 
        if (st->ss->external) {
-               char container[40];
-               int cfd = open_dev(st->container_dev);
+               int cfd = open_dev(st->container_devnm);
 
                if (cfd < 0)
                        return 1;
 
-               fmt_devname(container, st->container_dev);
-               st->ss->load_container(st, cfd, container);
+               st->ss->load_container(st, cfd, st->container_devnm);
                close(cfd);
-               ret_val = reshape_container(container, NULL, mdfd,
+               ret_val = reshape_container(st->container_devnm, NULL, mdfd,
                                            st, info, 0, backup_file,
-                                           0, 1, freeze_reshape);
+                                           0, forked,
+                                           1 | info->reshape_active,
+                                           freeze_reshape);
        } else
                ret_val = reshape_array(NULL, mdfd, "array", st, info, 1,
-                                       NULL, 0ULL, backup_file, 0, 0, 1,
+                                       NULL, INVALID_SECTORS,
+                                       backup_file, 0, forked,
+                                       1 | info->reshape_active,
                                        freeze_reshape);
 
        return ret_val;
 }
+
+char *make_backup(char *name)
+{
+       char *base = "backup_file-";
+       int len;
+       char *fname;
+
+       len = strlen(MAP_DIR) + 1 + strlen(base) + strlen(name)+1;
+       fname = xmalloc(len);
+       sprintf(fname, "%s/%s%s", MAP_DIR, base, name);
+       return fname;
+}
+
+char *locate_backup(char *name)
+{
+       char *fl = make_backup(name);
+       struct stat stb;
+
+       if (stat(fl, &stb) == 0 &&
+           S_ISREG(stb.st_mode))
+               return fl;
+
+       free(fl);
+       return NULL;
+}