]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - Grow.c
tests: add test that DDF marks missing devices as failed on assembly.
[thirdparty/mdadm.git] / Grow.c
diff --git a/Grow.c b/Grow.c
index 7b9cc70e2fdfe1db46d7f0088743fe166feb63bd..6f556f65af78749c70c7287c298342ec42806c92 100644 (file)
--- a/Grow.c
+++ b/Grow.c
@@ -1,7 +1,7 @@
 /*
  * mdadm - manage Linux "md" devices aka RAID arrays.
  *
- * Copyright (C) 2001-2012 Neil Brown <neilb@suse.de>
+ * Copyright (C) 2001-2013 Neil Brown <neilb@suse.de>
  *
  *
  *    This program is free software; you can redistribute it and/or modify
@@ -24,6 +24,8 @@
 #include       "mdadm.h"
 #include       "dlink.h"
 #include       <sys/mman.h>
+#include       <stdint.h>
+#include       <signal.h>
 
 #if ! defined(__BIG_ENDIAN) && ! defined(__LITTLE_ENDIAN)
 #error no endian defined
@@ -51,6 +53,7 @@ int restore_backup(struct supertype *st,
        dprintf("Called restore_backup()\n");
        fdlist = xmalloc(sizeof(int) * disk_count);
 
+       enable_fds(next_spare);
        for (i = 0; i < next_spare; i++)
                fdlist[i] = -1;
        for (dev = content->devs; dev; dev = dev->next) {
@@ -609,9 +612,14 @@ static void unfreeze(struct supertype *st)
                return unfreeze_container(st);
        else {
                struct mdinfo *sra = sysfs_read(-1, st->devnm, GET_VERSION);
+               char buf[20];
 
-               if (sra)
+               if (sra &&
+                   sysfs_get_str(sra, NULL, "sync_action", buf, 20) > 0
+                   && strcmp(buf, "frozen\n") == 0) {
+                       printf("unfreeze\n");
                        sysfs_set_str(sra, NULL, "sync_action", "idle");
+               }
                sysfs_free(sra);
        }
 }
@@ -624,13 +632,9 @@ static void wait_reshape(struct mdinfo *sra)
        if (fd < 0)
                return;
 
-       while  (sysfs_fd_get_str(fd, action, 20) > 0 &&
-               strncmp(action, "reshape", 7) == 0) {
-               fd_set rfds;
-               FD_ZERO(&rfds);
-               FD_SET(fd, &rfds);
-               select(fd+1, NULL, NULL, &rfds, NULL);
-       }
+       while (sysfs_fd_get_str(fd, action, 20) > 0 &&
+              strncmp(action, "reshape", 7) == 0)
+               sysfs_wait(fd, NULL);
        close(fd);
 }
 
@@ -731,7 +735,8 @@ void abort_reshape(struct mdinfo *sra)
        sysfs_set_num(sra, NULL, "suspend_hi", 0);
        sysfs_set_num(sra, NULL, "suspend_lo", 0);
        sysfs_set_num(sra, NULL, "sync_min", 0);
-       sysfs_set_str(sra, NULL, "sync_max", "max");
+       // It isn't safe to reset sync_max as we aren't monitoring.
+       // Array really should be stopped at this point.
 }
 
 int remove_disks_for_takeover(struct supertype *st,
@@ -837,6 +842,7 @@ int reshape_prepare_fdlist(char *devname,
        int d = 0;
        struct mdinfo *sd;
 
+       enable_fds(nrdisks);
        for (d = 0; d <= nrdisks; d++)
                fdlist[d] = -1;
        d = raid_disks;
@@ -930,17 +936,6 @@ int reshape_open_backup_file(char *backup_file,
        return 1;
 }
 
-unsigned long GCD(unsigned long a, unsigned long b)
-{
-       while (a != b) {
-               if (a < b)
-                       b -= a;
-               if (b < a)
-                       a -= b;
-       }
-       return a;
-}
-
 unsigned long compute_backup_blocks(int nchunk, int ochunk,
                                    unsigned int ndata, unsigned int odata)
 {
@@ -960,7 +955,7 @@ unsigned long compute_backup_blocks(int nchunk, int ochunk,
        return blocks;
 }
 
-char *analyse_change(struct mdinfo *info, struct reshape *re)
+char *analyse_change(char *devname, struct mdinfo *info, struct reshape *re)
 {
        /* Based on the current array state in info->array and
         * the changes in info->new_* etc, determine:
@@ -1001,9 +996,16 @@ char *analyse_change(struct mdinfo *info, struct reshape *re)
                        /* chunk size is meaningful, must divide component_size
                         * evenly
                         */
-                       if (info->component_size % (info->new_chunk/512))
-                               return "New chunk size does not"
-                                       " divide component size";
+                       if (info->component_size % (info->new_chunk/512)) {
+                               unsigned long long shrink = info->component_size;
+                               shrink &= ~(unsigned long long)(info->new_chunk/512-1);
+                               pr_err("New chunk size (%dK) does not evenly divide device size (%lluk)\n",
+                                      info->new_chunk/1024, info->component_size/2);
+                               pr_err("After shrinking any filesystem, \"mdadm --grow %s --size %llu\"\n",
+                                      devname, shrink/2);
+                               pr_err("will shrink the array so the given chunk size would work.\n");
+                               return "";
+                       }
                        break;
                default:
                        return "chunk size not meaningful for this level";
@@ -1202,11 +1204,15 @@ char *analyse_change(struct mdinfo *info, struct reshape *re)
                        delta_parity = 1;
                        re->level = 5;
                        re->before.layout = ALGORITHM_PARITY_N;
+                       if (info->new_layout == UnSet)
+                               info->new_layout = map_name(r5layout, "default");
                        break;
                case 6:
                        delta_parity = 2;
                        re->level = 6;
                        re->before.layout = ALGORITHM_PARITY_N;
+                       if (info->new_layout == UnSet)
+                               info->new_layout = map_name(r6layout, "default");
                        break;
                default:
                        return "Impossible level change requested";
@@ -1409,6 +1415,7 @@ char *analyse_change(struct mdinfo *info, struct reshape *re)
        /* So we have a restripe operation, we need to calculate the number
         * of blocks per reshape operation.
         */
+       re->new_size = info->component_size * re->before.data_disks;
        if (info->new_chunk == 0)
                info->new_chunk = info->array.chunk_size;
        if (re->after.data_disks == re->before.data_disks &&
@@ -1534,7 +1541,6 @@ int Grow_reshape(char *devname, int fd,
        struct mdinfo info;
        struct mdinfo *sra;
 
-
        if (ioctl(fd, GET_ARRAY_INFO, &array) < 0) {
                pr_err("%s is not an active md array - aborting\n",
                        devname);
@@ -2144,15 +2150,63 @@ static int verify_reshape_position(struct mdinfo *info, int level)
        return ret_val;
 }
 
+static unsigned long long choose_offset(unsigned long long lo,
+                                       unsigned long long hi,
+                                       unsigned long long min,
+                                       unsigned long long max)
+{
+       /* Choose a new offset between hi and lo.
+        * It must be between min and max, but
+        * we would prefer something near the middle of hi/lo, and also
+        * prefer to be aligned to a big power of 2.
+        *
+        * So we start with the middle, then for each bit,
+        * starting at '1' and increasing, if it is set, we either
+        * add it or subtract it if possible, preferring the option
+        * which is furthest from the boundary.
+        *
+        * We stop once we get a 1MB alignment. As units are in sectors,
+        * 1MB = 2*1024 sectors.
+        */
+       unsigned long long choice = (lo + hi) / 2;
+       unsigned long long bit = 1;
+
+       for (bit = 1; bit < 2*1024; bit = bit << 1) {
+               unsigned long long bigger, smaller;
+               if (! (bit & choice))
+                       continue;
+               bigger = choice + bit;
+               smaller = choice - bit;
+               if (bigger > max && smaller < min)
+                       break;
+               if (bigger > max)
+                       choice = smaller;
+               else if (smaller < min)
+                       choice = bigger;
+               else if (hi - bigger > smaller - lo)
+                       choice = bigger;
+               else
+                       choice = smaller;
+       }
+       return choice;
+}
+
 static int set_new_data_offset(struct mdinfo *sra, struct supertype *st,
                               char *devname, int delta_disks,
                               unsigned long long data_offset,
-                              unsigned long long min)
+                              unsigned long long min,
+                              int can_fallback)
 {
        struct mdinfo *sd;
        int dir = 0;
        int err = 0;
+       unsigned long long before, after;
 
+       /* Need to find min space before and after so same is used
+        * on all devices
+        */
+       before = UINT64_MAX;
+       after = UINT64_MAX;
        for (sd = sra->devs; sd; sd = sd->next) {
                char *dn;
                int dfd;
@@ -2186,110 +2240,147 @@ static int set_new_data_offset(struct mdinfo *sra, struct supertype *st,
                        /* Metadata doesn't support data_offset changes */
                        return 1;
                }
+               if (before > info2.space_before)
+                       before = info2.space_before;
+               if (after > info2.space_after)
+                       after = info2.space_after;
+
+               if (data_offset != INVALID_SECTORS) {
+                       if (dir == 0) {
+                               if (info2.data_offset == data_offset) {
+                                       pr_err("%s: already has that data_offset\n",
+                                              dn);
+                                       goto release;
+                               }
+                               if (data_offset < info2.data_offset)
+                                       dir = -1;
+                               else
+                                       dir = 1;
+                       } else if ((data_offset <= info2.data_offset && dir == 1) ||
+                                  (data_offset >= info2.data_offset && dir == -1)) {
+                               pr_err("%s: differing data offsets on devices make this --data-offset setting impossible\n",
+                                       dn);
+                               goto release;
+                       }
+               }
+       }
+       if (before == UINT64_MAX)
+               /* impossible really, there must be no devices */
+               return 1;
+
+       for (sd = sra->devs; sd; sd = sd->next) {
+               char *dn = map_dev(sd->disk.major, sd->disk.minor, 0);
+               unsigned long long new_data_offset;
+
+               if (sd->disk.state & (1<<MD_DISK_FAULTY))
+                       continue;
                if (delta_disks < 0) {
                        /* Don't need any space as array is shrinking
                         * just move data_offset up by min
                         */
                        if (data_offset == INVALID_SECTORS)
-                               info2.new_data_offset = info2.data_offset + min;
+                               new_data_offset = sd->data_offset + min;
                        else {
-                               if (data_offset < info2.data_offset + min) {
+                               if (data_offset < sd->data_offset + min) {
                                        pr_err("--data-offset too small for %s\n",
                                                dn);
                                        goto release;
                                }
-                               info2.new_data_offset = data_offset;
+                               new_data_offset = data_offset;
                        }
                } else if (delta_disks > 0) {
                        /* need space before */
-                       if (info2.space_before < min) {
+                       if (before < min) {
+                               if (can_fallback)
+                                       goto fallback;
                                pr_err("Insufficient head-space for reshape on %s\n",
                                        dn);
                                goto release;
                        }
                        if (data_offset == INVALID_SECTORS)
-                               info2.new_data_offset = info2.data_offset - min;
+                               new_data_offset = sd->data_offset - min;
                        else {
-                               if (data_offset > info2.data_offset - min) {
+                               if (data_offset > sd->data_offset - min) {
                                        pr_err("--data-offset too large for %s\n",
                                                dn);
                                        goto release;
                                }
-                               info2.new_data_offset = data_offset;
+                               new_data_offset = data_offset;
                        }
                } else {
                        if (dir == 0) {
-                               /* can move up or down. 'data_offset'
-                                * might guide us, otherwise choose
-                                * direction with most space
+                               /* can move up or down.  If 'data_offset'
+                                * was set we would have already decided,
+                                * so just choose direction with most space.
                                 */
-                               if (data_offset == INVALID_SECTORS) {
-                                       if (info2.space_before > info2.space_after)
-                                               dir = -1;
-                                       else
-                                               dir = 1;
-                               } else if (data_offset < info2.data_offset)
+                               if (before > after)
                                        dir = -1;
                                else
                                        dir = 1;
-                               sysfs_set_str(sra, NULL, "reshape_direction",
-                                             dir == 1 ? "backwards" : "forwards");
                        }
-                       switch (dir) {
-                       case 1: /* Increase data offset */
-                               if (info2.space_after < min) {
+                       sysfs_set_str(sra, NULL, "reshape_direction",
+                                     dir == 1 ? "backwards" : "forwards");
+                       if (dir > 0) {
+                               /* Increase data offset */
+                               if (after < min) {
+                                       if (can_fallback)
+                                               goto fallback;
                                        pr_err("Insufficient tail-space for reshape on %s\n",
                                                dn);
                                        goto release;
                                }
                                if (data_offset != INVALID_SECTORS &&
-                                   data_offset < info2.data_offset + min) {
+                                   data_offset < sd->data_offset + min) {
                                        pr_err("--data-offset too small on %s\n",
                                                dn);
                                        goto release;
                                }
                                if (data_offset != INVALID_SECTORS)
-                                       info2.new_data_offset = data_offset;
-                               else {
-                                       unsigned long long off =
-                                               info2.space_after / 2;
-                                       off &= ~7ULL;
-                                       if (off < min)
-                                               off = min;
-                                       info2.new_data_offset =
-                                               info2.data_offset + off;
-                               }
-                               break;
-                       case -1: /* Decrease data offset */
-                               if (info2.space_before < min) {
+                                       new_data_offset = data_offset;
+                               else
+                                       new_data_offset = choose_offset(sd->data_offset,
+                                                                       sd->data_offset + after,
+                                                                       sd->data_offset + min,
+                                                                       sd->data_offset + after);
+                       } else {
+                               /* Decrease data offset */
+                               if (before < min) {
+                                       if (can_fallback)
+                                               goto fallback;
                                        pr_err("insufficient head-room on %s\n",
                                                dn);
                                        goto release;
                                }
                                if (data_offset != INVALID_SECTORS &&
-                                   data_offset < info2.data_offset - min) {
+                                   data_offset < sd->data_offset - min) {
                                        pr_err("--data-offset too small on %s\n",
                                                dn);
                                        goto release;
                                }
                                if (data_offset != INVALID_SECTORS)
-                                       info2.new_data_offset = data_offset;
-                               else {
-                                       unsigned long long off =
-                                               info2.space_before / 2;
-                                       off &= ~7ULL;
-                                       if (off < min)
-                                               off = min;
-                                       info2.new_data_offset =
-                                               info2.data_offset - off;
-                               }
-                               break;
+                                       new_data_offset = data_offset;
+                               else
+                                       new_data_offset = choose_offset(sd->data_offset - before,
+                                                                       sd->data_offset,
+                                                                       sd->data_offset - before,
+                                                                       sd->data_offset - min);
                        }
                }
-               if (sysfs_set_num(sra, sd, "new_offset",
-                                 info2.new_data_offset) < 0) {
-                       err = errno;
-                       err = -1;
+               err = sysfs_set_num(sra, sd, "new_offset", new_data_offset);
+               if (err < 0 && errno == E2BIG) {
+                       /* try again after increasing data size to max */
+                       err = sysfs_set_num(sra, sd, "size", 0);
+                       if (err < 0 && errno == EINVAL &&
+                           !(sd->disk.state & (1<<MD_DISK_SYNC))) {
+                               /* some kernels have a bug where you cannot
+                                * use '0' on spare devices. */
+                               sysfs_set_num(sra, sd, "size",
+                                             (sra->component_size + after)/2);
+                       }
+                       err = sysfs_set_num(sra, sd, "new_offset",
+                                           new_data_offset);
+               }
+               if (err < 0) {
                        if (errno == E2BIG && data_offset != INVALID_SECTORS) {
                                pr_err("data-offset is too big for %s\n",
                                       dn);
@@ -2310,6 +2401,9 @@ static int set_new_data_offset(struct mdinfo *sra, struct supertype *st,
        return err;
 release:
        return -1;
+fallback:
+       /* Just use a backup file */
+       return 1;
 }
 
 static int raid10_reshape(char *container, int fd, char *devname,
@@ -2366,7 +2460,7 @@ static int raid10_reshape(char *container, int fd, char *devname,
                }
        }
        err = set_new_data_offset(sra, st, devname, info->delta_disks, data_offset,
-                                 min);
+                                 min, 0);
        if (err == 1) {
                pr_err("Cannot set new_data_offset: RAID10 reshape not\n");
                cont_err("supported on this kernel\n");
@@ -2549,6 +2643,102 @@ release:
        return -1;
 }
 
+static int impose_level(int fd, int level, char *devname, int verbose)
+{
+       char *c;
+       struct mdu_array_info_s array;
+       struct mdinfo info;
+       sysfs_init(&info, fd, NULL);
+
+       ioctl(fd, GET_ARRAY_INFO, &array);
+       if (level == 0 &&
+           (array.level >= 4 && array.level <= 6)) {
+               /* To convert to RAID0 we need to fail and
+                * remove any non-data devices. */
+               int found = 0;
+               int d;
+               int data_disks = array.raid_disks - 1;
+               if (array.level == 6)
+                       data_disks -= 1;
+               if (array.level == 5 &&
+                   array.layout != ALGORITHM_PARITY_N)
+                       return -1;
+               if (array.level == 6 &&
+                   array.layout != ALGORITHM_PARITY_N_6)
+                       return -1;
+               sysfs_set_str(&info, NULL,"sync_action", "idle");
+               /* First remove any spares so no recovery starts */
+               for (d = 0, found = 0;
+                    d < MAX_DISKS && found < array.nr_disks;
+                    d++) {
+                       mdu_disk_info_t disk;
+                       disk.number = d;
+                       if (ioctl(fd, GET_DISK_INFO, &disk) < 0)
+                               continue;
+                       if (disk.major == 0 && disk.minor == 0)
+                               continue;
+                       found++;
+                       if ((disk.state & (1 << MD_DISK_ACTIVE))
+                           && disk.raid_disk < data_disks)
+                               /* keep this */
+                               continue;
+                       ioctl(fd, HOT_REMOVE_DISK,
+                             makedev(disk.major, disk.minor));
+               }
+               /* Now fail anything left */
+               ioctl(fd, GET_ARRAY_INFO, &array);
+               for (d = 0, found = 0;
+                    d < MAX_DISKS && found < array.nr_disks;
+                    d++) {
+                       int cnt;
+                       mdu_disk_info_t disk;
+                       disk.number = d;
+                       if (ioctl(fd, GET_DISK_INFO, &disk) < 0)
+                               continue;
+                       if (disk.major == 0 && disk.minor == 0)
+                               continue;
+                       found++;
+                       if ((disk.state & (1 << MD_DISK_ACTIVE))
+                           && disk.raid_disk < data_disks)
+                               /* keep this */
+                               continue;
+                       ioctl(fd, SET_DISK_FAULTY,
+                             makedev(disk.major, disk.minor));
+                       cnt = 5;
+                       while (ioctl(fd, HOT_REMOVE_DISK,
+                                    makedev(disk.major, disk.minor)) < 0
+                              && errno == EBUSY
+                              && cnt--) {
+                               usleep(10000);
+                       }
+               }
+       }
+       c = map_num(pers, level);
+       if (c) {
+               int err = sysfs_set_str(&info, NULL, "level", c);
+               if (err) {
+                       err = errno;
+                       pr_err("%s: could not set level to %s\n",
+                               devname, c);
+                       if (err == EBUSY &&
+                           (array.state & (1<<MD_SB_BITMAP_PRESENT)))
+                               cont_err("Bitmap must be removed"
+                                        " before level can be changed\n");
+                       return err;
+               }
+               if (verbose >= 0)
+                       pr_err("level of %s changed to %s\n",
+                               devname, c);
+       }
+       return 0;
+}
+
+int sigterm = 0;
+static void catch_term(int sig)
+{
+       sigterm = 1;
+}
+
 static int reshape_array(char *container, int fd, char *devname,
                         struct supertype *st, struct mdinfo *info,
                         int force, struct mddev_dev *devlist,
@@ -2600,7 +2790,7 @@ static int reshape_array(char *container, int fd, char *devname,
                info->new_level = UnSet;
                if (info->delta_disks > 0)
                        info->array.raid_disks -= info->delta_disks;
-               msg = analyse_change(info, &reshape);
+               msg = analyse_change(devname, info, &reshape);
                info->new_level = new_level;
                if (info->delta_disks > 0)
                        info->array.raid_disks += info->delta_disks;
@@ -2608,9 +2798,11 @@ static int reshape_array(char *container, int fd, char *devname,
                        /* Make sure the array isn't read-only */
                        ioctl(fd, RESTART_ARRAY_RW, 0);
        } else
-               msg = analyse_change(info, &reshape);
+               msg = analyse_change(devname, info, &reshape);
        if (msg) {
-               pr_err("%s\n", msg);
+               /* if msg == "", error has already been printed */
+               if (msg[0])
+                       pr_err("%s\n", msg);
                goto release;
        }
        if (restart &&
@@ -2639,6 +2831,18 @@ static int reshape_array(char *container, int fd, char *devname,
                /* reshape already started. just skip to monitoring the reshape */
                if (reshape.backup_blocks == 0)
                        return 0;
+               if (restart & RESHAPE_NO_BACKUP)
+                       return 0;
+
+               /* Need 'sra' down at 'started:' */
+               sra = sysfs_read(fd, NULL,
+                                GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE|GET_CHUNK|
+                                GET_CACHE);
+               if (!sra) {
+                       pr_err("%s: Cannot get array details from sysfs\n",
+                              devname);
+                       goto release;
+               }
                goto started;
        }
        /* The container is frozen but the array may not be.
@@ -2681,25 +2885,11 @@ static int reshape_array(char *container, int fd, char *devname,
        }
 
        if (reshape.level != array.level) {
-               char *c = map_num(pers, reshape.level);
-               int err;
-               if (c == NULL)
-                       goto release;
-
-               err = sysfs_set_str(info, NULL, "level", c);
-               if (err) {
-                       err = errno;
-                       pr_err("%s: could not set level to %s\n",
-                               devname, c);
-                       if (err == EBUSY &&
-                           (info->array.state & (1<<MD_SB_BITMAP_PRESENT)))
-                               cont_err("Bitmap must be removed"
-                                        " before level can be changed\n");
+               int err = impose_level(fd, reshape.level, devname, verbose);
+               if (err)
                        goto release;
-               }
-               if (verbose >= 0)
-                       pr_err("level of %s changed to %s\n",
-                               devname, c);
+               info->new_layout = UnSet; /* after level change,
+                                          * layout is meaningless */
                orig_level = array.level;
                sysfs_freeze_array(info);
 
@@ -2758,8 +2948,7 @@ static int reshape_array(char *container, int fd, char *devname,
                Manage_subdevs(devname, fd, devlist, verbose,
                               0,NULL, 0);
 
-
-       if (reshape.backup_blocks == 0 && data_offset)
+       if (reshape.backup_blocks == 0 && data_offset != INVALID_SECTORS)
                reshape.backup_blocks = reshape.before.data_disks * info->array.chunk_size/512;
        if (reshape.backup_blocks == 0) {
                /* No restriping needed, but we might need to impose
@@ -2851,8 +3040,6 @@ static int reshape_array(char *container, int fd, char *devname,
                goto release;
        }
 
-started:
-
        if (array.level == 10) {
                /* Reshaping RAID10 does not require any data backup by
                 * user-space.  Instead it requires that the data_offset
@@ -2876,9 +3063,10 @@ started:
        }
 
        if (!backup_file)
-               switch(set_new_data_offset(sra, st, devname, info->delta_disks,
+               switch(set_new_data_offset(sra, st, devname,
+                                          reshape.after.data_disks - reshape.before.data_disks,
                                           data_offset,
-                                          reshape.min_offset_change)) {
+                                          reshape.min_offset_change, 1)) {
        case -1:
                goto release;
        case 0:
@@ -2899,7 +3087,23 @@ started:
                        pr_err("Failed to initiate reshape!\n");
                        goto release;
                }
-
+               if (info->new_level == reshape.level)
+                       return 0;
+               /* need to adjust level when reshape completes */
+               switch(fork()) {
+               case -1: /* ignore error, but don't wait */
+                       return 0;
+               default: /* parent */
+                       return 0;
+               case 0:
+                       map_fork();
+                       break;
+               }
+               close(fd);
+               wait_reshape(sra);
+               fd = open_dev(sra->sys_name);
+               if (fd >= 0)
+                       impose_level(fd, info->new_level, devname, verbose);
                return 0;
        case 1: /* Couldn't set data_offset, try the old way */
                if (data_offset != INVALID_SECTORS) {
@@ -2909,6 +3113,7 @@ started:
                break;
        }
 
+started:
        /* Decide how many blocks (sectors) for a reshape
         * unit.  The number we have so far is just a minimum
         */
@@ -2953,8 +3158,9 @@ started:
                if (backup_file == NULL) {
                        if (reshape.after.data_disks <=
                            reshape.before.data_disks) {
-                               pr_err("%s: Cannot grow - "
-                                       "need backup-file\n", devname);
+                               pr_err("%s: Cannot grow - need backup-file\n",
+                                      devname);
+                               pr_err(" Please provide one with \"--backup=...\"\n");
                                goto release;
                        } else if (sra->array.spare_disks == 0) {
                                pr_err("%s: Cannot grow - "
@@ -3037,7 +3243,7 @@ started:
        do {
                struct mdstat_ent *mds, *m;
                delayed = 0;
-               mds = mdstat_read(0, 0);
+               mds = mdstat_read(1, 0);
                for (m = mds; m; m = m->next)
                        if (strcmp(m->devnm, sra->sys_name) == 0) {
                                if (m->resync &&
@@ -3057,9 +3263,9 @@ started:
                        delayed = 0;
                }
                if (delayed)
-                       sleep(30 - (delayed-1) * 25);
+                       mdstat_wait(30 - (delayed-1) * 25);
        } while (delayed);
-
+       mdstat_close();
        close(fd);
        if (check_env("MDADM_GROW_VERIFY"))
                fd = open(devname, O_RDONLY | O_DIRECT);
@@ -3067,6 +3273,8 @@ started:
                fd = -1;
        mlockall(MCL_FUTURE);
 
+       signal(SIGTERM, catch_term);
+
        if (st->ss->external) {
                /* metadata handler takes it from here */
                done = st->ss->manage_reshape(
@@ -3124,14 +3332,10 @@ started:
                set_array_size(st, info, info->text_version);
 
        if (info->new_level != reshape.level) {
-
-               c = map_num(pers, info->new_level);
-               if (c) {
-                       err = sysfs_set_str(sra, NULL, "level", c);
-                       if (err)
-                               pr_err("%s: could not set level "
-                                      "to %s\n", devname, c);
-               }
+               if (fd < 0)
+                       fd = open(devname, O_RDONLY);
+               impose_level(fd, info->new_level, devname, verbose);
+               close(fd);
                if (info->new_level == 0)
                        st->update_tail = NULL;
        }
@@ -3288,7 +3492,7 @@ int reshape_container(char *container, char *devname,
                        flush_mdmon(container);
 
                rv = reshape_array(container, fd, adev, st,
-                                  content, force, NULL, 0ULL,
+                                  content, force, NULL, INVALID_SECTORS,
                                   backup_file, verbose, 1, restart,
                                   freeze_reshape);
                close(fd);
@@ -3339,7 +3543,7 @@ int progress_reshape(struct mdinfo *info, struct reshape *reshape,
                     unsigned long long backup_point,
                     unsigned long long wait_point,
                     unsigned long long *suspend_point,
-                    unsigned long long *reshape_completed)
+                    unsigned long long *reshape_completed, int *frozen)
 {
        /* This function is called repeatedly by the reshape manager.
         * It determines how much progress can safely be made and allows
@@ -3556,7 +3760,8 @@ int progress_reshape(struct mdinfo *info, struct reshape *reshape,
                wait_point = info->component_size - wait_point;
        }
 
-       sysfs_set_num(info, NULL, "sync_max", max_progress);
+       if (!*frozen)
+               sysfs_set_num(info, NULL, "sync_max", max_progress);
 
        /* Now wait.  If we have already reached the point that we were
         * asked to wait to, don't wait at all, else wait for any change.
@@ -3576,7 +3781,6 @@ int progress_reshape(struct mdinfo *info, struct reshape *reshape,
                 * waiting forever on a dead array
                 */
                char action[20];
-               fd_set rfds;
                if (sysfs_get_str(info, NULL, "sync_action",
                                  action, 20) <= 0 ||
                    strncmp(action, "reshape", 7) != 0)
@@ -3592,9 +3796,7 @@ int progress_reshape(struct mdinfo *info, struct reshape *reshape,
                    && info->reshape_progress < (info->component_size
                                                 * reshape->after.data_disks))
                        break;
-               FD_ZERO(&rfds);
-               FD_SET(fd, &rfds);
-               select(fd+1, NULL, NULL, &rfds, NULL);
+               sysfs_wait(fd, NULL);
                if (sysfs_fd_get_ll(fd, &completed) < 0)
                        goto check_progress;
        }
@@ -3639,23 +3841,24 @@ check_progress:
                /* The abort might only be temporary.  Wait up to 10
                 * seconds for fd to contain a valid number again.
                 */
-               struct timeval tv;
+               int wait = 10000;
                int rv = -2;
-               tv.tv_sec = 10;
-               tv.tv_usec = 0;
-               while (fd >= 0 && rv < 0 && tv.tv_sec > 0) {
-                       fd_set rfds;
-                       FD_ZERO(&rfds);
-                       FD_SET(fd, &rfds);
-                       if (select(fd+1, NULL, NULL, &rfds, &tv) != 1)
+               unsigned long long new_sync_max;
+               while (fd >= 0 && rv < 0 && wait > 0) {
+                       if (sysfs_wait(fd, &wait) != 1)
                                break;
                        switch (sysfs_fd_get_ll(fd, &completed)) {
                        case 0:
                                /* all good again */
                                rv = 1;
+                               /* If "sync_max" is no longer max_progress
+                                * we need to freeze things
+                                */
+                               sysfs_get_ll(info, NULL, "sync_max", &new_sync_max);
+                               *frozen = (new_sync_max != max_progress);
                                break;
                        case -2: /* read error - abort */
-                               tv.tv_sec = 0;
+                               wait = 0;
                                break;
                        }
                }
@@ -3947,6 +4150,7 @@ int child_monitor(int afd, struct mdinfo *sra, struct reshape *reshape,
        struct mdinfo *sd;
        unsigned long stripes;
        int uuid[4];
+       int frozen = 0;
 
        /* set up the backup-super-block.  This requires the
         * uuid from the array.
@@ -4024,9 +4228,11 @@ int child_monitor(int afd, struct mdinfo *sra, struct reshape *reshape,
                                wait_point = __le64_to_cpu(bsb.arraystart2);
                }
 
+               reshape_completed = sra->reshape_progress;
                rv = progress_reshape(sra, reshape,
                                      backup_point, wait_point,
-                                     &suspend_point, &reshape_completed);
+                                     &suspend_point, &reshape_completed,
+                                     &frozen);
                /* external metadata would need to ping_monitor here */
                sra->reshape_progress = reshape_completed;
 
@@ -4052,7 +4258,8 @@ int child_monitor(int afd, struct mdinfo *sra, struct reshape *reshape,
                                forget_backup(dests, destfd,
                                              destoffsets, 1);
                }
-
+               if (sigterm)
+                       rv = -2;
                if (rv < 0) {
                        if (rv == -1)
                                done = 1;
@@ -4060,6 +4267,7 @@ int child_monitor(int afd, struct mdinfo *sra, struct reshape *reshape,
                }
                if (rv == 0 && increasing && !st->ss->external) {
                        /* No longer need to monitor this reshape */
+                       sysfs_set_str(sra, NULL, "sync_max", "max");
                        done = 1;
                        break;
                }
@@ -4113,7 +4321,12 @@ int child_monitor(int afd, struct mdinfo *sra, struct reshape *reshape,
        }
 
        /* FIXME maybe call progress_reshape one more time instead */
-       abort_reshape(sra); /* remove any remaining suspension */
+       /* remove any remaining suspension */
+       sysfs_set_num(sra, NULL, "suspend_lo", 0x7FFFFFFFFFFFFFFFULL);
+       sysfs_set_num(sra, NULL, "suspend_hi", 0);
+       sysfs_set_num(sra, NULL, "suspend_lo", 0);
+       sysfs_set_num(sra, NULL, "sync_min", 0);
+
        if (reshape->before.data_disks == reshape->after.data_disks)
                sysfs_set_num(sra, NULL, "sync_speed_min", speed);
        free(buf);
@@ -4494,6 +4707,8 @@ int Grow_continue_command(char *devname, int fd,
                                continue;
                        err = st->ss->load_super(st, fd2, NULL);
                        close(fd2);
+                       /* invalidate fd2 to avoid possible double close() */
+                       fd2 = -1;
                        if (err)
                                continue;
                        break;
@@ -4652,10 +4867,14 @@ int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info,
                close(cfd);
                ret_val = reshape_container(st->container_devnm, NULL, mdfd,
                                            st, info, 0, backup_file,
-                                           0, 1, freeze_reshape);
+                                           0,
+                                           1 | info->reshape_active,
+                                           freeze_reshape);
        } else
                ret_val = reshape_array(NULL, mdfd, "array", st, info, 1,
-                                       NULL, 0ULL, backup_file, 0, 0, 1,
+                                       NULL, INVALID_SECTORS,
+                                       backup_file, 0, 0,
+                                       1 | info->reshape_active,
                                        freeze_reshape);
 
        return ret_val;