]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - Grow.c
Incremental: Remove redundant call for GET_ARRAY_INFO
[thirdparty/mdadm.git] / Grow.c
diff --git a/Grow.c b/Grow.c
old mode 100644 (file)
new mode 100755 (executable)
index f6a989d..4eab5cc
--- a/Grow.c
+++ b/Grow.c
@@ -24,6 +24,7 @@
 #include       "mdadm.h"
 #include       "dlink.h"
 #include       <sys/mman.h>
+#include       <stddef.h>
 #include       <stdint.h>
 #include       <signal.h>
 #include       <sys/wait.h>
 #include       "md_u.h"
 #include       "md_p.h"
 
-#ifndef offsetof
-#define offsetof(t,f) ((size_t)&(((t*)0)->f))
-#endif
-
 int restore_backup(struct supertype *st,
                   struct mdinfo *content,
                   int working_disks,
@@ -90,11 +87,9 @@ int restore_backup(struct supertype *st,
        }
        free(fdlist);
        if (err) {
-               pr_err("Failed to restore critical"
-                      " section for reshape - sorry.\n");
+               pr_err("Failed to restore critical section for reshape - sorry.\n");
                if (!backup_file)
-                       pr_err("Possibly you need"
-                               " to specify a --backup-file\n");
+                       pr_err("Possibly you need to specify a --backup-file\n");
                return 1;
        }
 
@@ -120,7 +115,7 @@ int Grow_Add_device(char *devname, int fd, char *newdev)
        struct supertype *st = NULL;
        char *subarray = NULL;
 
-       if (ioctl(fd, GET_ARRAY_INFO, &info.array) < 0) {
+       if (md_get_array_info(fd, &info.array) < 0) {
                pr_err("cannot get array info for %s\n", devname);
                return 1;
        }
@@ -226,7 +221,7 @@ int Grow_Add_device(char *devname, int fd, char *newdev)
         * Now go through and update all superblocks
         */
 
-       if (ioctl(fd, GET_ARRAY_INFO, &info.array) < 0) {
+       if (md_get_array_info(fd, &info.array) < 0) {
                pr_err("cannot get array info for %s\n", devname);
                return 1;
        }
@@ -295,15 +290,24 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
        int major = BITMAP_MAJOR_HI;
        int vers = md_get_version(fd);
        unsigned long long bitmapsize, array_size;
+       struct mdinfo *mdi;
 
        if (vers < 9003) {
                major = BITMAP_MAJOR_HOSTENDIAN;
-               pr_err("Warning - bitmaps created on this kernel"
-                       " are not portable\n"
-                       "  between different architectures.  Consider upgrading"
-                       " the Linux kernel.\n");
+               pr_err("Warning - bitmaps created on this kernel are not portable\n"
+                       "  between different architectures.  Consider upgrading the Linux kernel.\n");
        }
 
+       /*
+        * We only ever get called if s->bitmap_file is != NULL, so this check
+        * is just here to quiet down static code checkers.
+        */
+       if (!s->bitmap_file)
+               return 1;
+
+       if (strcmp(s->bitmap_file, "clustered") == 0)
+               major = BITMAP_MAJOR_CLUSTERED;
+
        if (ioctl(fd, GET_BITMAP_FILE, &bmf) != 0) {
                if (errno == ENOMEM)
                        pr_err("Memory allocation failure.\n");
@@ -312,8 +316,8 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
                return 1;
        }
        if (bmf.pathname[0]) {
-               if (strcmp(s->bitmap_file,"none")==0) {
-                       if (ioctl(fd, SET_BITMAP_FILE, -1)!= 0) {
+               if (strcmp(s->bitmap_file,"none") == 0) {
+                       if (ioctl(fd, SET_BITMAP_FILE, -1) != 0) {
                                pr_err("failed to remove bitmap %s\n",
                                        bmf.pathname);
                                return 1;
@@ -324,21 +328,23 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
                        devname, bmf.pathname);
                return 1;
        }
-       if (ioctl(fd, GET_ARRAY_INFO, &array) != 0) {
+       if (md_get_array_info(fd, &array) != 0) {
                pr_err("cannot get array status for %s\n", devname);
                return 1;
        }
-       if (array.state & (1<<MD_SB_BITMAP_PRESENT)) {
+       if (array.state & (1 << MD_SB_BITMAP_PRESENT)) {
                if (strcmp(s->bitmap_file, "none")==0) {
-                       array.state &= ~(1<<MD_SB_BITMAP_PRESENT);
-                       if (ioctl(fd, SET_ARRAY_INFO, &array)!= 0) {
-                               pr_err("failed to remove internal bitmap.\n");
+                       array.state &= ~(1 << MD_SB_BITMAP_PRESENT);
+                       if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
+                               if (array.state & (1 << MD_SB_CLUSTERED))
+                                       pr_err("failed to remove clustered bitmap.\n");
+                               else
+                                       pr_err("failed to remove internal bitmap.\n");
                                return 1;
                        }
                        return 0;
                }
-               pr_err("Internal bitmap already present on %s\n",
-                       devname);
+               pr_err("bitmap already present on %s\n", devname);
                return 1;
        }
 
@@ -354,7 +360,7 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
        bitmapsize = array.size;
        bitmapsize <<= 1;
        if (get_dev_size(fd, NULL, &array_size) &&
-           array_size > (0x7fffffffULL<<9)) {
+           array_size > (0x7fffffffULL << 9)) {
                /* Array is big enough that we cannot trust array.size
                 * try other approaches
                 */
@@ -366,7 +372,9 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
        }
 
        if (array.level == 10) {
-               int ncopies = (array.layout&255)*((array.layout>>8)&255);
+               int ncopies;
+
+               ncopies = (array.layout & 255) * ((array.layout >> 8) & 255);
                bitmapsize = bitmapsize * array.raid_disks / ncopies;
        }
 
@@ -382,66 +390,84 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
                free(st);
                return 1;
        }
-       if (strcmp(s->bitmap_file, "internal") == 0) {
+
+       mdi = sysfs_read(fd, NULL, GET_CONSISTENCY_POLICY);
+       if (mdi) {
+               if (mdi->consistency_policy == CONSISTENCY_POLICY_PPL) {
+                       pr_err("Cannot add bitmap to array with PPL\n");
+                       free(mdi);
+                       free(st);
+                       return 1;
+               }
+               free(mdi);
+       }
+
+       if (strcmp(s->bitmap_file, "internal") == 0 ||
+           strcmp(s->bitmap_file, "clustered") == 0) {
                int rv;
                int d;
                int offset_setable = 0;
-               struct mdinfo *mdi;
                if (st->ss->add_internal_bitmap == NULL) {
-                       pr_err("Internal bitmaps not supported "
-                               "with %s metadata\n", st->ss->name);
+                       pr_err("Internal bitmaps not supported with %s metadata\n", st->ss->name);
                        return 1;
                }
+               st->nodes = c->nodes;
+               st->cluster_name = c->homecluster;
                mdi = sysfs_read(fd, NULL, GET_BITMAP_LOCATION);
                if (mdi)
                        offset_setable = 1;
-               for (d=0; d< st->max_devs; d++) {
+               for (d = 0; d < st->max_devs; d++) {
                        mdu_disk_info_t disk;
                        char *dv;
+                       int fd2;
+
                        disk.number = d;
                        if (ioctl(fd, GET_DISK_INFO, &disk) < 0)
                                continue;
-                       if (disk.major == 0 &&
-                           disk.minor == 0)
+                       if (disk.major == 0 && disk.minor == 0)
                                continue;
-                       if ((disk.state & (1<<MD_DISK_SYNC))==0)
+                       if ((disk.state & (1 << MD_DISK_SYNC)) == 0)
                                continue;
                        dv = map_dev(disk.major, disk.minor, 1);
-                       if (dv) {
-                               int fd2 = dev_open(dv, O_RDWR);
-                               if (fd2 < 0)
-                                       continue;
-                               if (st->ss->load_super(st, fd2, NULL)==0) {
-                                       if (st->ss->add_internal_bitmap(
-                                                   st,
-                                                   &s->bitmap_chunk, c->delay, s->write_behind,
-                                                   bitmapsize, offset_setable,
-                                                   major)
-                                               )
-                                               st->ss->write_bitmap(st, fd2);
-                                       else {
-                                               pr_err("failed to create internal bitmap"
-                                                      " - chunksize problem.\n");
-                                               close(fd2);
-                                               return 1;
-                                       }
+                       if (!dv)
+                               continue;
+                       fd2 = dev_open(dv, O_RDWR);
+                       if (fd2 < 0)
+                               continue;
+                       rv = st->ss->load_super(st, fd2, NULL);
+                       if (!rv) {
+                               rv = st->ss->add_internal_bitmap(
+                                       st, &s->bitmap_chunk, c->delay,
+                                       s->write_behind, bitmapsize,
+                                       offset_setable, major);
+                               if (!rv) {
+                                       st->ss->write_bitmap(st, fd2,
+                                                            NodeNumUpdate);
+                               } else {
+                                       pr_err("failed to create internal bitmap - chunksize problem.\n");
                                }
-                               close(fd2);
+                       } else {
+                               pr_err("failed to load super-block.\n");
                        }
+                       close(fd2);
+                       if (rv)
+                               return 1;
                }
                if (offset_setable) {
                        st->ss->getinfo_super(st, mdi, NULL);
                        sysfs_init(mdi, fd, NULL);
                        rv = sysfs_set_num_signed(mdi, NULL, "bitmap/location",
                                                  mdi->bitmap_offset);
+                       free(mdi);
                } else {
-                       array.state |= (1<<MD_SB_BITMAP_PRESENT);
+                       if (strcmp(s->bitmap_file, "clustered") == 0)
+                               array.state |= (1 << MD_SB_CLUSTERED);
+                       array.state |= (1 << MD_SB_BITMAP_PRESENT);
                        rv = ioctl(fd, SET_ARRAY_INFO, &array);
                }
                if (rv < 0) {
                        if (errno == EBUSY)
-                               pr_err("Cannot add bitmap while array is"
-                                      " resyncing or reshaping etc.\n");
+                               pr_err("Cannot add bitmap while array is resyncing or reshaping etc.\n");
                        pr_err("failed to set internal bitmap.\n");
                        return 1;
                }
@@ -459,8 +485,8 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
                        disk.number = d;
                        if (ioctl(fd, GET_DISK_INFO, &disk) < 0)
                                continue;
-                       if ((disk.major==0 && disk.minor==0) ||
-                           (disk.state & (1<<MD_DISK_REMOVED)))
+                       if ((disk.major==0 && disk.minor == 0) ||
+                           (disk.state & (1 << MD_DISK_REMOVED)))
                                continue;
                        dv = map_dev(disk.major, disk.minor, 1);
                        if (!dv)
@@ -479,21 +505,20 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
                        pr_err("cannot find UUID for array!\n");
                        return 1;
                }
-               if (CreateBitmap(s->bitmap_file, c->force, (char*)uuid, s->bitmap_chunk,
-                                c->delay, s->write_behind, bitmapsize, major)) {
+               if (CreateBitmap(s->bitmap_file, c->force, (char*)uuid,
+                                s->bitmap_chunk, c->delay, s->write_behind,
+                                bitmapsize, major)) {
                        return 1;
                }
                bitmap_fd = open(s->bitmap_file, O_RDWR);
                if (bitmap_fd < 0) {
-                       pr_err("weird: %s cannot be opened\n",
-                               s->bitmap_file);
+                       pr_err("weird: %s cannot be opened\n", s->bitmap_file);
                        return 1;
                }
                if (ioctl(fd, SET_BITMAP_FILE, bitmap_fd) < 0) {
                        int err = errno;
                        if (errno == EBUSY)
-                               pr_err("Cannot add bitmap while array is"
-                                      " resyncing or reshaping etc.\n");
+                               pr_err("Cannot add bitmap while array is resyncing or reshaping etc.\n");
                        pr_err("Cannot set bitmap file for %s: %s\n",
                                devname, strerror(err));
                        return 1;
@@ -503,6 +528,178 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
        return 0;
 }
 
+int Grow_consistency_policy(char *devname, int fd, struct context *c, struct shape *s)
+{
+       struct supertype *st;
+       struct mdinfo *sra;
+       struct mdinfo *sd;
+       char *subarray = NULL;
+       int ret = 0;
+       char container_dev[PATH_MAX];
+
+       if (s->consistency_policy != CONSISTENCY_POLICY_RESYNC &&
+           s->consistency_policy != CONSISTENCY_POLICY_PPL) {
+               pr_err("Operation not supported for consistency policy %s\n",
+                      map_num(consistency_policies, s->consistency_policy));
+               return 1;
+       }
+
+       st = super_by_fd(fd, &subarray);
+       if (!st)
+               return 1;
+
+       sra = sysfs_read(fd, NULL, GET_CONSISTENCY_POLICY|GET_LEVEL|
+                                  GET_DEVS|GET_STATE);
+       if (!sra) {
+               ret = 1;
+               goto free_st;
+       }
+
+       if (s->consistency_policy == CONSISTENCY_POLICY_PPL &&
+           !st->ss->write_init_ppl) {
+               pr_err("%s metadata does not support PPL\n", st->ss->name);
+               ret = 1;
+               goto free_info;
+       }
+
+       if (sra->array.level != 5) {
+               pr_err("Operation not supported for array level %d\n",
+                               sra->array.level);
+               ret = 1;
+               goto free_info;
+       }
+
+       if (sra->consistency_policy == (unsigned)s->consistency_policy) {
+               pr_err("Consistency policy is already %s\n",
+                      map_num(consistency_policies, s->consistency_policy));
+               ret = 1;
+               goto free_info;
+       } else if (sra->consistency_policy != CONSISTENCY_POLICY_RESYNC &&
+                  sra->consistency_policy != CONSISTENCY_POLICY_PPL) {
+               pr_err("Current consistency policy is %s, cannot change to %s\n",
+                      map_num(consistency_policies, sra->consistency_policy),
+                      map_num(consistency_policies, s->consistency_policy));
+               ret = 1;
+               goto free_info;
+       }
+
+       if (subarray) {
+               char *update;
+
+               if (s->consistency_policy == CONSISTENCY_POLICY_PPL)
+                       update = "ppl";
+               else
+                       update = "no-ppl";
+
+               sprintf(container_dev, "/dev/%s", st->container_devnm);
+
+               ret = Update_subarray(container_dev, subarray, update, NULL,
+                                     c->verbose);
+               if (ret)
+                       goto free_info;
+       }
+
+       if (s->consistency_policy == CONSISTENCY_POLICY_PPL) {
+               struct mdinfo info;
+
+               if (subarray) {
+                       struct mdinfo *mdi;
+                       int cfd;
+
+                       cfd = open(container_dev, O_RDWR|O_EXCL);
+                       if (cfd < 0) {
+                               pr_err("Failed to open %s\n", container_dev);
+                               ret = 1;
+                               goto free_info;
+                       }
+
+                       ret = st->ss->load_container(st, cfd, st->container_devnm);
+                       close(cfd);
+
+                       if (ret) {
+                               pr_err("Cannot read superblock for %s\n",
+                                      container_dev);
+                               goto free_info;
+                       }
+
+                       mdi = st->ss->container_content(st, subarray);
+                       info = *mdi;
+                       free(mdi);
+               }
+
+               for (sd = sra->devs; sd; sd = sd->next) {
+                       int dfd;
+                       char *devpath;
+
+                       if ((sd->disk.state & (1 << MD_DISK_SYNC)) == 0)
+                               continue;
+
+                       devpath = map_dev(sd->disk.major, sd->disk.minor, 0);
+                       dfd = dev_open(devpath, O_RDWR);
+                       if (dfd < 0) {
+                               pr_err("Failed to open %s\n", devpath);
+                               ret = 1;
+                               goto free_info;
+                       }
+
+                       if (!subarray) {
+                               ret = st->ss->load_super(st, dfd, NULL);
+                               if (ret) {
+                                       pr_err("Failed to load super-block.\n");
+                                       close(dfd);
+                                       goto free_info;
+                               }
+
+                               ret = st->ss->update_super(st, sra, "ppl", devname,
+                                                          c->verbose, 0, NULL);
+                               if (ret) {
+                                       close(dfd);
+                                       st->ss->free_super(st);
+                                       goto free_info;
+                               }
+                               st->ss->getinfo_super(st, &info, NULL);
+                       }
+
+                       ret |= sysfs_set_num(sra, sd, "ppl_sector", info.ppl_sector);
+                       ret |= sysfs_set_num(sra, sd, "ppl_size", info.ppl_size);
+
+                       if (ret) {
+                               pr_err("Failed to set PPL attributes for %s\n",
+                                      sd->sys_name);
+                               close(dfd);
+                               st->ss->free_super(st);
+                               goto free_info;
+                       }
+
+                       ret = st->ss->write_init_ppl(st, &info, dfd);
+                       if (ret)
+                               pr_err("Failed to write PPL\n");
+
+                       close(dfd);
+
+                       if (!subarray)
+                               st->ss->free_super(st);
+
+                       if (ret)
+                               goto free_info;
+               }
+       }
+
+       ret = sysfs_set_str(sra, NULL, "consistency_policy",
+                           map_num(consistency_policies,
+                                   s->consistency_policy));
+       if (ret)
+               pr_err("Failed to change array consistency policy\n");
+
+free_info:
+       sysfs_free(sra);
+free_st:
+       free(st);
+       free(subarray);
+
+       return ret;
+}
+
 /*
  * When reshaping an array we might need to backup some data.
  * This is written to all spares with a 'super_block' describing it.
@@ -623,10 +820,8 @@ static void unfreeze(struct supertype *st)
 
                if (sra &&
                    sysfs_get_str(sra, NULL, "sync_action", buf, 20) > 0
-                   && strcmp(buf, "frozen\n") == 0) {
-                       printf("unfreeze\n");
+                   && strcmp(buf, "frozen\n") == 0)
                        sysfs_set_str(sra, NULL, "sync_action", "idle");
-               }
                sysfs_free(sra);
        }
 }
@@ -729,15 +924,28 @@ int start_reshape(struct mdinfo *sra, int already_running,
        if (!already_running)
                sysfs_set_num(sra, NULL, "sync_min", sync_max_to_set);
        err = err ?: sysfs_set_num(sra, NULL, "sync_max", sync_max_to_set);
-       if (!already_running)
-               err = err ?: sysfs_set_str(sra, NULL, "sync_action", "reshape");
-
+       if (!already_running && err == 0) {
+               int cnt = 5;
+               do {
+                       err = sysfs_set_str(sra, NULL, "sync_action", "reshape");
+                       if (err)
+                               sleep(1);
+               } while (err && errno == EBUSY && cnt-- > 0);
+       }
        return err;
 }
 
 void abort_reshape(struct mdinfo *sra)
 {
        sysfs_set_str(sra, NULL, "sync_action", "idle");
+       /*
+        * Prior to kernel commit: 23ddff3792f6 ("md: allow suspend_lo and
+        * suspend_hi to decrease as well as increase.")
+        * you could only increase suspend_{lo,hi} unless the region they
+        * covered was empty.  So to reset to 0, you need to push suspend_lo
+        * up past suspend_hi first.  So to maximize the chance of mdadm
+        * working on all kernels, we want to keep doing that.
+        */
        sysfs_set_num(sra, NULL, "suspend_lo", 0x7FFFFFFFFFFFFFFFULL);
        sysfs_set_num(sra, NULL, "suspend_hi", 0);
        sysfs_set_num(sra, NULL, "suspend_lo", 0);
@@ -754,6 +962,26 @@ int remove_disks_for_takeover(struct supertype *st,
        struct mdinfo *remaining;
        int slot;
 
+       if (st->ss->external) {
+               int rv = 0;
+               struct mdinfo *arrays = st->ss->container_content(st, NULL);
+               /*
+                * containter_content returns list of arrays in container
+                * If arrays->next is not NULL it means that there are
+                * 2 arrays in container and operation should be blocked
+                */
+               if (arrays) {
+                       if (arrays->next)
+                               rv = 1;
+                       sysfs_free(arrays);
+                       if (rv) {
+                               pr_err("Error. Cannot perform operation on /dev/%s\n", st->devnm);
+                               pr_err("For this operation it MUST be single array in container\n");
+                               return rv;
+                       }
+               }
+       }
+
        if (sra->array.level == 10)
                nr_of_copies = layout & 0xff;
        else if (sra->array.level == 1)
@@ -856,7 +1084,8 @@ int reshape_prepare_fdlist(char *devname,
        for (sd = sra->devs; sd; sd = sd->next) {
                if (sd->disk.state & (1<<MD_DISK_FAULTY))
                        continue;
-               if (sd->disk.state & (1<<MD_DISK_SYNC)) {
+               if (sd->disk.state & (1<<MD_DISK_SYNC) &&
+                   sd->disk.raid_disk < raid_disks) {
                        char *dn = map_dev(sd->disk.major,
                                           sd->disk.minor, 1);
                        fdlist[sd->disk.raid_disk]
@@ -920,8 +1149,7 @@ int reshape_open_backup_file(char *backup_file,
        dev = stb.st_dev;
        fstat(fd, &stb);
        if (stb.st_rdev == dev) {
-               pr_err("backup file must NOT be"
-                       " on the array being reshaped.\n");
+               pr_err("backup file must NOT be on the array being reshaped.\n");
                close(*fdlist);
                return 0;
        }
@@ -929,8 +1157,7 @@ int reshape_open_backup_file(char *backup_file,
        memset(buf, 0, 512);
        for (i=0; i < blocks + 8 ; i++) {
                if (write(*fdlist, buf, 512) != 512) {
-                       pr_err("%s: cannot create"
-                               " backup file %s: %s\n",
+                       pr_err("%s: cannot create backup file %s: %s\n",
                                devname, backup_file, strerror(errno));
                        return 0;
                }
@@ -943,7 +1170,9 @@ int reshape_open_backup_file(char *backup_file,
 
        if (!restart && strncmp(backup_file, MAP_DIR, strlen(MAP_DIR)) != 0) {
                char *bu = make_backup(sys_name);
-               symlink(backup_file, bu);
+               if (symlink(backup_file, bu))
+                       pr_err("Recording backup file in " MAP_DIR " failed: %s\n",
+                              strerror(errno));
                free(bu);
        }
 
@@ -1029,21 +1258,23 @@ char *analyse_change(char *devname, struct mdinfo *info, struct reshape *re)
 
        switch (info->array.level) {
        default:
-               return "Cannot understand this RAID level";
+               return "No reshape is possibly for this RAID level";
+       case LEVEL_LINEAR:
+               if (info->delta_disks != UnSet)
+                       return "Only --add is supported for LINEAR, setting --raid-disks is not needed";
+               else
+                       return "Only --add is supported for LINEAR, other --grow options are not meaningful";
        case 1:
                /* RAID1 can convert to RAID1 with different disks, or
                 * raid5 with 2 disks, or
                 * raid0 with 1 disk
                 */
-               if (info->new_level > 1 &&
-                   (info->component_size & 7))
-                       return "Cannot convert RAID1 of this size - "
-                               "reduce size to multiple of 4K first.";
+               if (info->new_level > 1 && (info->component_size & 7))
+                       return "Cannot convert RAID1 of this size - reduce size to multiple of 4K first.";
                if (info->new_level == 0) {
                        if (info->delta_disks != UnSet &&
                            info->delta_disks != 0)
-                               return "Cannot change number of disks "
-                                       "with RAID1->RAID0 conversion";
+                               return "Cannot change number of disks with RAID1->RAID0 conversion";
                        re->level = 0;
                        re->before.data_disks = 1;
                        re->after.data_disks = 1;
@@ -1056,9 +1287,9 @@ char *analyse_change(char *devname, struct mdinfo *info, struct reshape *re)
                        re->level = 1;
                        return NULL;
                }
-               if (info->array.raid_disks == 2 &&
-                   info->new_level == 5) {
-
+               if (info->array.raid_disks != 2 && info->new_level == 5)
+                       return "Can only convert a 2-device array to RAID5";
+               if (info->array.raid_disks == 2 && info->new_level == 5) {
                        re->level = 5;
                        re->before.data_disks = 1;
                        if (info->delta_disks != UnSet &&
@@ -1129,8 +1360,7 @@ char *analyse_change(char *devname, struct mdinfo *info, struct reshape *re)
                                if (far > 1 && !offset)
                                        return "Cannot reshape RAID10 to far-mode";
                                if (near * far != copies)
-                                       return "Cannot change number of copies"
-                                               " when reshaping RAID10";
+                                       return "Cannot change number of copies when reshaping RAID10";
                        }
                        if (info->delta_disks == UnSet)
                                info->delta_disks = 0;
@@ -1170,7 +1400,8 @@ char *analyse_change(char *devname, struct mdinfo *info, struct reshape *re)
        case 0:
                /* RAID0 can be converted to RAID10, or to RAID456 */
                if (info->new_level == 10) {
-                       if (info->new_layout == UnSet && info->delta_disks == UnSet) {
+                       if (info->new_layout == UnSet &&
+                           info->delta_disks == UnSet) {
                                /* Assume near=2 layout */
                                info->new_layout = 0x102;
                                info->delta_disks = info->array.raid_disks;
@@ -1180,15 +1411,13 @@ char *analyse_change(char *devname, struct mdinfo *info, struct reshape *re)
                                                  / info->array.raid_disks);
                                if (info->array.raid_disks * (copies-1)
                                    != info->delta_disks)
-                                       return "Impossible number of devices"
-                                               " for RAID0->RAID10";
+                                       return "Impossible number of devices for RAID0->RAID10";
                                info->new_layout = 0x100 + copies;
                        }
                        if (info->delta_disks == UnSet) {
                                int copies = info->new_layout & 0xff;
                                if (info->new_layout != 0x100 + copies)
-                                       return "New layout impossible"
-                                               " for RAID0->RAID10";;
+                                       return "New layout impossible for RAID0->RAID10";;
                                info->delta_disks = (copies - 1) *
                                        info->array.raid_disks;
                        }
@@ -1283,8 +1512,7 @@ char *analyse_change(char *devname, struct mdinfo *info, struct reshape *re)
                                return "Can only convert a 2-device array to RAID1";
                        if (info->delta_disks != UnSet &&
                            info->delta_disks != 0)
-                               return "Cannot set raid_disk when "
-                                       "converting RAID5->RAID1";
+                               return "Cannot set raid_disk when converting RAID5->RAID1";
                        re->level = 1;
                        info->new_chunk = 0;
                        return NULL;
@@ -1341,7 +1569,6 @@ char *analyse_change(char *devname, struct mdinfo *info, struct reshape *re)
 
                switch (re->level) {
                case 4:
-                       re->before.layout = 0;
                        re->after.layout = 0;
                        break;
                case 5:
@@ -1380,8 +1607,7 @@ char *analyse_change(char *devname, struct mdinfo *info, struct reshape *re)
                                        strcat(strcpy(layout, ls), "-6");
                                        l = map_name(r6layout, layout);
                                        if (l == UnSet)
-                                               return "Cannot find RAID6 layout"
-                                                       " to convert to";
+                                               return "Cannot find RAID6 layout to convert to";
                                } else {
                                        /* Current RAID6 has no equivalent.
                                         * If it is already a '-6' layout we
@@ -1414,16 +1640,19 @@ char *analyse_change(char *devname, struct mdinfo *info, struct reshape *re)
        if (info->delta_disks == UnSet)
                info->delta_disks = delta_parity;
 
-       re->after.data_disks = (re->before.data_disks
-                               + info->delta_disks
-                               - delta_parity);
+       re->after.data_disks =
+               (re->before.data_disks + info->delta_disks - delta_parity);
+
        switch (re->level) {
-       case 6: re->parity = 2;
+       case 6:
+               re->parity = 2;
                break;
        case 4:
-       case 5: re->parity = 1;
+       case 5:
+               re->parity = 1;
                break;
-       default: re->parity = 0;
+       default:
+               re->parity = 0;
                break;
        }
        /* So we have a restripe operation, we need to calculate the number
@@ -1477,7 +1706,7 @@ static int set_array_size(struct supertype *st, struct mdinfo *sra,
 
        if (text_version == NULL)
                text_version = sra->text_version;
-       subarray = strchr(text_version+1, '/')+1;
+       subarray = strchr(text_version + 1, '/')+1;
        info = st->ss->container_content(st, subarray);
        if (info) {
                unsigned long long current_size = 0;
@@ -1493,8 +1722,8 @@ static int set_array_size(struct supertype *st, struct mdinfo *sra,
                                ret_val = 0;
                                dprintf("Array size changed");
                        }
-                       dprintf(" from %llu to %llu.\n",
-                               current_size, new_size);
+                       dprintf_cont(" from %llu to %llu.\n",
+                                    current_size, new_size);
                }
                sysfs_free(info);
        } else
@@ -1514,8 +1743,8 @@ static int reshape_container(char *container, char *devname,
                             struct supertype *st,
                             struct mdinfo *info,
                             int force,
-                            char *backup_file,
-                            int verbose, int restart, int freeze_reshape);
+                            char *backup_file, int verbose,
+                            int forked, int restart, int freeze_reshape);
 
 int Grow_reshape(char *devname, int fd,
                 struct mddev_dev *devlist,
@@ -1555,28 +1784,26 @@ int Grow_reshape(char *devname, int fd,
        struct mdinfo info;
        struct mdinfo *sra;
 
-       if (ioctl(fd, GET_ARRAY_INFO, &array) < 0) {
+       if (md_get_array_info(fd, &array) < 0) {
                pr_err("%s is not an active md array - aborting\n",
                        devname);
                return 1;
        }
-       if (data_offset != INVALID_SECTORS && array.level != 10
-           && (array.level < 4 || array.level > 6)) {
+       if (data_offset != INVALID_SECTORS && array.level != 10 &&
+           (array.level < 4 || array.level > 6)) {
                pr_err("--grow --data-offset not yet supported\n");
                return 1;
        }
 
        if (s->size > 0 &&
            (s->chunk || s->level!= UnSet || s->layout_str || s->raiddisks)) {
-               pr_err("cannot change component size at the same time "
-                       "as other changes.\n"
-                       "   Change size first, then check data is intact before "
-                       "making other changes.\n");
+               pr_err("cannot change component size at the same time as other changes.\n"
+                       "   Change size first, then check data is intact before making other changes.\n");
                return 1;
        }
 
-       if (s->raiddisks && s->raiddisks < array.raid_disks && array.level > 1 &&
-           get_linux_version() < 2006032 &&
+       if (s->raiddisks && s->raiddisks < array.raid_disks &&
+           array.level > 1 && get_linux_version() < 2006032 &&
            !check_env("MDADM_FORCE_FEWER")) {
                pr_err("reducing the number of devices is not safe before Linux 2.6.32\n"
                        "       Please use a newer kernel\n");
@@ -1589,10 +1816,18 @@ int Grow_reshape(char *devname, int fd,
                return 1;
        }
        if (s->raiddisks > st->max_devs) {
-               pr_err("Cannot increase raid-disks on this array"
-                       " beyond %d\n", st->max_devs);
+               pr_err("Cannot increase raid-disks on this array beyond %d\n", st->max_devs);
                return 1;
        }
+       if (s->level == 0 &&
+           (array.state & (1<<MD_SB_BITMAP_PRESENT)) &&
+           !(array.state & (1<<MD_SB_CLUSTERED))) {
+                array.state &= ~(1<<MD_SB_BITMAP_PRESENT);
+                if (ioctl(fd, SET_ARRAY_INFO, &array)!= 0) {
+                        pr_err("failed to remove internal bitmap.\n");
+                        return 1;
+                }
+        }
 
        /* in the external case we need to check that the requested reshape is
         * supported, and perform an initial check that the container holds the
@@ -1638,15 +1873,14 @@ int Grow_reshape(char *devname, int fd,
                                /* check if reshape is allowed based on metadata
                                 * indications stored in content.array.status
                                 */
-                               if (content->array.state & (1<<MD_SB_BLOCK_VOLUME))
+                               if (content->array.state &
+                                   (1 << MD_SB_BLOCK_VOLUME))
                                        allow_reshape = 0;
-                               if (content->array.state
-                                   & (1<<MD_SB_BLOCK_CONTAINER_RESHAPE))
+                               if (content->array.state &
+                                   (1 << MD_SB_BLOCK_CONTAINER_RESHAPE))
                                        allow_reshape = 0;
                                if (!allow_reshape) {
-                                       pr_err("cannot reshape arrays in"
-                                              " container with unsupported"
-                                              " metadata: %s(%s)\n",
+                                       pr_err("cannot reshape arrays in container with unsupported metadata: %s(%s)\n",
                                               devname, container);
                                        sysfs_free(cc);
                                        free(subarray);
@@ -1663,10 +1897,9 @@ int Grow_reshape(char *devname, int fd,
        for (dv = devlist; dv; dv = dv->next)
                added_disks++;
        if (s->raiddisks > array.raid_disks &&
-           array.spare_disks +added_disks < (s->raiddisks - array.raid_disks) &&
+           array.spare_disks + added_disks < (s->raiddisks - array.raid_disks) &&
            !c->force) {
-               pr_err("Need %d spare%s to avoid degraded array,"
-                      " and only have %d.\n"
+               pr_err("Need %d spare%s to avoid degraded array, and only have %d.\n"
                       "       Use --force to over-ride this check.\n",
                       s->raiddisks - array.raid_disks,
                       s->raiddisks - array.raid_disks == 1 ? "" : "s",
@@ -1674,8 +1907,8 @@ int Grow_reshape(char *devname, int fd,
                return 1;
        }
 
-       sra = sysfs_read(fd, NULL, GET_LEVEL | GET_DISKS | GET_DEVS
-                        GET_STATE | GET_VERSION);
+       sra = sysfs_read(fd, NULL, GET_LEVEL | GET_DISKS | GET_DEVS |
+                        GET_STATE | GET_VERSION);
        if (sra) {
                if (st->ss->external && subarray == NULL) {
                        array.level = LEVEL_CONTAINER;
@@ -1692,14 +1925,14 @@ int Grow_reshape(char *devname, int fd,
                sysfs_free(sra);
                return 1;
        } else if (frozen < 0) {
-               pr_err("%s is performing resync/recovery and cannot"
-                       " be reshaped\n", devname);
+               pr_err("%s is performing resync/recovery and cannot be reshaped\n", devname);
                sysfs_free(sra);
                return 1;
        }
 
        /* ========= set size =============== */
-       if (s->size > 0 && (s->size == MAX_SIZE || s->size != (unsigned)array.size)) {
+       if (s->size > 0 &&
+           (s->size == MAX_SIZE || s->size != (unsigned)array.size)) {
                unsigned long long orig_size = get_component_size(fd)/2;
                unsigned long long min_csize;
                struct mdinfo *mdi;
@@ -1715,7 +1948,8 @@ int Grow_reshape(char *devname, int fd,
                }
 
                if (reshape_super(st, s->size, UnSet, UnSet, 0, 0, UnSet, NULL,
-                                 devname, APPLY_METADATA_CHANGES, c->verbose > 0)) {
+                                 devname, APPLY_METADATA_CHANGES,
+                                 c->verbose > 0)) {
                        rv = 1;
                        goto release;
                }
@@ -1734,8 +1968,8 @@ int Grow_reshape(char *devname, int fd,
                                                sizeinfo->array.layout,
                                                sizeinfo->array.raid_disks);
                                new_size /= data_disks;
-                               dprintf("Metadata size correction from %llu to "
-                                       "%llu (%llu)\n", orig_size, new_size,
+                               dprintf("Metadata size correction from %llu to %llu (%llu)\n",
+                                       orig_size, new_size,
                                        new_size * data_disks);
                                s->size = new_size;
                                sysfs_free(sizeinfo);
@@ -1772,13 +2006,11 @@ int Grow_reshape(char *devname, int fd,
                        }
                }
                if (rv) {
-                       pr_err("Cannot set size on "
-                               "array members.\n");
+                       pr_err("Cannot set size on array members.\n");
                        goto size_change_error;
                }
                if (min_csize && s->size > min_csize) {
-                       pr_err("Cannot safely make this array "
-                               "use more than 2TB per device on this kernel.\n");
+                       pr_err("Cannot safely make this array use more than 2TB per device on this kernel.\n");
                        rv = 1;
                        goto size_change_error;
                }
@@ -1786,8 +2018,7 @@ int Grow_reshape(char *devname, int fd,
                        /* Don't let the kernel choose a size - it will get
                         * it wrong
                         */
-                       pr_err("Limited v0.90 array to "
-                              "2TB per device\n");
+                       pr_err("Limited v0.90 array to 2TB per device\n");
                        s->size = min_csize;
                }
                if (st->ss->external) {
@@ -1796,10 +2027,10 @@ int Grow_reshape(char *devname, int fd,
                                                   "raid5");
                                if (!rv) {
                                        raid0_takeover = 1;
-                                       /* get array parametes after takeover
-                                        * to chane one parameter at time only
+                                       /* get array parameters after takeover
+                                        * to change one parameter at time only
                                         */
-                                       rv = ioctl(fd, GET_ARRAY_INFO, &array);
+                                       rv = md_get_array_info(fd, &array);
                                }
                        }
                        /* make sure mdmon is
@@ -1815,7 +2046,7 @@ int Grow_reshape(char *devname, int fd,
                if (s->size == MAX_SIZE)
                        s->size = 0;
                array.size = s->size;
-               if ((unsigned)array.size != s->size) {
+               if (s->size & ~INT32_MAX) {
                        /* got truncated to 32bit, write to
                         * component_size instead
                         */
@@ -1841,7 +2072,7 @@ int Grow_reshape(char *devname, int fd,
                        /* go back to raid0, drop parity disk
                         */
                        sysfs_set_str(sra, NULL, "level", "raid0");
-                       ioctl(fd, GET_ARRAY_INFO, &array);
+                       md_get_array_info(fd, &array);
                }
 
 size_change_error:
@@ -1870,18 +2101,16 @@ size_change_error:
                            sysfs_set_str(sra, NULL, "resync_start", "none") < 0)
                                pr_err("--assume-clean not supported with --grow on this kernel\n");
                }
-               ioctl(fd, GET_ARRAY_INFO, &array);
+               md_get_array_info(fd, &array);
                s->size = get_component_size(fd)/2;
                if (s->size == 0)
                        s->size = array.size;
                if (c->verbose >= 0) {
                        if (s->size == orig_size)
-                               pr_err("component size of %s "
-                                       "unchanged at %lluK\n",
+                               pr_err("component size of %s unchanged at %lluK\n",
                                        devname, s->size);
                        else
-                               pr_err("component size of %s "
-                                       "has been set to %lluK\n",
+                               pr_err("component size of %s has been set to %lluK\n",
                                        devname, s->size);
                }
                changed = 1;
@@ -1916,7 +2145,7 @@ size_change_error:
                int err;
                err = remove_disks_for_takeover(st, sra, array.layout);
                if (err) {
-                       dprintf(Name": Array cannot be reshaped\n");
+                       dprintf("Array cannot be reshaped\n");
                        if (cfd > -1)
                                close(cfd);
                        rv = 1;
@@ -1949,12 +2178,9 @@ size_change_error:
                if (info.array.level == 6 &&
                    (info.new_level == 6 || info.new_level == UnSet) &&
                    info.array.layout >= 16) {
-                       pr_err("%s has a non-standard layout.  If you"
-                              " wish to preserve this\n", devname);
-                       cont_err("during the reshape, please specify"
-                                " --layout=preserve\n");
-                       cont_err("If you want to change it, specify a"
-                                " layout or use --layout=normalise\n");
+                       pr_err("%s has a non-standard layout.  If you wish to preserve this\n", devname);
+                       cont_err("during the reshape, please specify --layout=preserve\n");
+                       cont_err("If you want to change it, specify a layout or use --layout=normalise\n");
                        rv = 1;
                        goto release;
                }
@@ -1971,8 +2197,7 @@ size_change_error:
                                info.new_layout = map_name(r6layout, l);
                        }
                } else {
-                       pr_err("%s is only meaningful when reshaping"
-                              " a RAID6 array.\n", s->layout_str);
+                       pr_err("%s is only meaningful when reshaping a RAID6 array.\n", s->layout_str);
                        rv = 1;
                        goto release;
                }
@@ -1994,8 +2219,7 @@ size_change_error:
                        strcat(l, "-6");
                        info.new_layout = map_name(r6layout, l);
                } else {
-                       pr_err("%s in only meaningful when reshaping"
-                              " to RAID6\n", s->layout_str);
+                       pr_err("%s in only meaningful when reshaping to RAID6\n", s->layout_str);
                        rv = 1;
                        goto release;
                }
@@ -2017,14 +2241,12 @@ size_change_error:
                        info.new_layout = parse_layout_faulty(s->layout_str);
                        break;
                default:
-                       pr_err("layout not meaningful"
-                               " with this level\n");
+                       pr_err("layout not meaningful with this level\n");
                        rv = 1;
                        goto release;
                }
                if (info.new_layout == UnSet) {
-                       pr_err("layout %s not understood"
-                               " for this level\n",
+                       pr_err("layout %s not understood for this level\n",
                                s->layout_str);
                        rv = 1;
                        goto release;
@@ -2045,7 +2267,7 @@ size_change_error:
                        rv =1 ;
                }
                if (s->layout_str) {
-                       if (ioctl(fd, GET_ARRAY_INFO, &array) != 0) {
+                       if (md_get_array_info(fd, &array) != 0) {
                                dprintf("Cannot get array information.\n");
                                goto release;
                        }
@@ -2066,8 +2288,12 @@ size_change_error:
                 * number of devices (On-Line Capacity Expansion) must be
                 * performed at the level of the container
                 */
+               if (fd > 0) {
+                       close(fd);
+                       fd = -1;
+               }
                rv = reshape_container(container, devname, -1, st, &info,
-                                      c->force, c->backup_file, c->verbose, 0, 0);
+                                      c->force, c->backup_file, c->verbose, 0, 0, 0);
                frozen = 0;
        } else {
                /* get spare devices from external metadata
@@ -2130,27 +2356,22 @@ static int verify_reshape_position(struct mdinfo *info, int level)
                char *ep;
                unsigned long long position = strtoull(buf, &ep, 0);
 
-               dprintf(Name": Read sync_max sysfs entry is: %s\n", buf);
+               dprintf("Read sync_max sysfs entry is: %s\n", buf);
                if (!(ep == buf || (*ep != 0 && *ep != '\n' && *ep != ' '))) {
                        position *= get_data_disks(level,
                                                   info->new_layout,
                                                   info->array.raid_disks);
                        if (info->reshape_progress < position) {
-                               dprintf("Corrected reshape progress (%llu) to "
-                                       "md position (%llu)\n",
+                               dprintf("Corrected reshape progress (%llu) to md position (%llu)\n",
                                        info->reshape_progress, position);
                                info->reshape_progress = position;
                                ret_val = 1;
                        } else if (info->reshape_progress > position) {
-                               pr_err("Fatal error: array "
-                                      "reshape was not properly frozen "
-                                      "(expected reshape position is %llu, "
-                                      "but reshape progress is %llu.\n",
+                               pr_err("Fatal error: array reshape was not properly frozen (expected reshape position is %llu, but reshape progress is %llu.\n",
                                       position, info->reshape_progress);
                                ret_val = -1;
                        } else {
-                               dprintf("Reshape position in md and metadata "
-                                       "are the same;");
+                               dprintf("Reshape position in md and metadata are the same;");
                                ret_val = 1;
                        }
                }
@@ -2252,7 +2473,10 @@ static int set_new_data_offset(struct mdinfo *sra, struct supertype *st,
                if (info2.space_before == 0 &&
                    info2.space_after == 0) {
                        /* Metadata doesn't support data_offset changes */
-                       return 1;
+                       if (!can_fallback)
+                               pr_err("%s: Metadata version doesn't support data_offset changes\n",
+                                      devname);
+                       goto fallback;
                }
                if (before > info2.space_before)
                        before = info2.space_before;
@@ -2497,8 +2721,7 @@ static int raid10_reshape(char *container, int fd, char *devname,
                       devname);
                if (err == EBUSY &&
                    (info->array.state & (1<<MD_SB_BITMAP_PRESENT)))
-                       cont_err("       Bitmap must be removed before"
-                                " shape can be changed\n");
+                       cont_err("       Bitmap must be removed before shape can be changed\n");
                goto release;
        }
        sysfs_free(sra);
@@ -2571,7 +2794,7 @@ static void update_cache_size(char *container, struct mdinfo *sra,
        /* make sure there is room for 'blocks' with a bit to spare */
        if (cache < 16 + blocks / disks)
                cache = 16 + blocks / disks;
-       cache /= (4096/512); /* Covert from sectors to pages */
+       cache /= (4096/512); /* Convert from sectors to pages */
 
        if (sra->cache_size < cache)
                subarray_set_num(container, sra, "stripe_cache_size",
@@ -2607,7 +2830,7 @@ static int impose_reshape(struct mdinfo *sra,
                                                 * reshape->after.data_disks);
        }
 
-       ioctl(fd, GET_ARRAY_INFO, &array);
+       md_get_array_info(fd, &array);
        if (info->array.chunk_size == info->new_chunk &&
            reshape->before.layout == reshape->after.layout &&
            st->ss->external == 0) {
@@ -2622,8 +2845,7 @@ static int impose_reshape(struct mdinfo *sra,
 
                        if (err == EBUSY &&
                            (array.state & (1<<MD_SB_BITMAP_PRESENT)))
-                               cont_err("Bitmap must be removed before"
-                                        " shape can be changed\n");
+                               cont_err("Bitmap must be removed before shape can be changed\n");
 
                        goto release;
                }
@@ -2647,8 +2869,7 @@ static int impose_reshape(struct mdinfo *sra,
 
                        if (err == EBUSY &&
                            (array.state & (1<<MD_SB_BITMAP_PRESENT)))
-                               cont_err("Bitmap must be removed before"
-                                        " shape can be changed\n");
+                               cont_err("Bitmap must be removed before shape can be changed\n");
                        goto release;
                }
        }
@@ -2664,7 +2885,7 @@ static int impose_level(int fd, int level, char *devname, int verbose)
        struct mdinfo info;
        sysfs_init(&info, fd, NULL);
 
-       ioctl(fd, GET_ARRAY_INFO, &array);
+       md_get_array_info(fd, &array);
        if (level == 0 &&
            (array.level >= 4 && array.level <= 6)) {
                /* To convert to RAID0 we need to fail and
@@ -2700,11 +2921,10 @@ static int impose_level(int fd, int level, char *devname, int verbose)
                              makedev(disk.major, disk.minor));
                }
                /* Now fail anything left */
-               ioctl(fd, GET_ARRAY_INFO, &array);
+               md_get_array_info(fd, &array);
                for (d = 0, found = 0;
                     d < MAX_DISKS && found < array.nr_disks;
                     d++) {
-                       int cnt;
                        mdu_disk_info_t disk;
                        disk.number = d;
                        if (ioctl(fd, GET_DISK_INFO, &disk) < 0)
@@ -2718,13 +2938,7 @@ static int impose_level(int fd, int level, char *devname, int verbose)
                                continue;
                        ioctl(fd, SET_DISK_FAULTY,
                              makedev(disk.major, disk.minor));
-                       cnt = 5;
-                       while (ioctl(fd, HOT_REMOVE_DISK,
-                                    makedev(disk.major, disk.minor)) < 0
-                              && errno == EBUSY
-                              && cnt--) {
-                               usleep(10000);
-                       }
+                       hot_remove_disk(fd, makedev(disk.major, disk.minor), 1);
                }
        }
        c = map_num(pers, level);
@@ -2736,8 +2950,7 @@ static int impose_level(int fd, int level, char *devname, int verbose)
                                devname, c);
                        if (err == EBUSY &&
                            (array.state & (1<<MD_SB_BITMAP_PRESENT)))
-                               cont_err("Bitmap must be removed"
-                                        " before level can be changed\n");
+                               cont_err("Bitmap must be removed before level can be changed\n");
                        return err;
                }
                if (verbose >= 0)
@@ -2753,6 +2966,48 @@ static void catch_term(int sig)
        sigterm = 1;
 }
 
+static int continue_via_systemd(char *devnm)
+{
+       int skipped, i, pid, status;
+       char pathbuf[1024];
+       /* In a systemd/udev world, it is best to get systemd to
+        * run "mdadm --grow --continue" rather than running in the
+        * background.
+        */
+       switch(fork()) {
+       case  0:
+               /* FIXME yuk. CLOSE_EXEC?? */
+               skipped = 0;
+               for (i = 3; skipped < 20; i++)
+                       if (close(i) < 0)
+                               skipped++;
+                       else
+                               skipped = 0;
+
+               /* Don't want to see error messages from
+                * systemctl.  If the service doesn't exist,
+                * we fork ourselves.
+                */
+               close(2);
+               open("/dev/null", O_WRONLY);
+               snprintf(pathbuf, sizeof(pathbuf), "mdadm-grow-continue@%s.service",
+                        devnm);
+               status = execl("/usr/bin/systemctl", "systemctl",
+                              "start",
+                              pathbuf, NULL);
+               status = execl("/bin/systemctl", "systemctl", "start",
+                              pathbuf, NULL);
+               exit(1);
+       case -1: /* Just do it ourselves. */
+               break;
+       default: /* parent - good */
+               pid = wait(&status);
+               if (pid >= 0 && status == 0)
+                       return 1;
+       }
+       return 0;
+}
+
 static int reshape_array(char *container, int fd, char *devname,
                         struct supertype *st, struct mdinfo *info,
                         int force, struct mddev_dev *devlist,
@@ -2782,11 +3037,12 @@ static int reshape_array(char *container, int fd, char *devname,
        unsigned long long array_size;
        int done;
        struct mdinfo *sra = NULL;
+       char buf[20];
 
        /* when reshaping a RAID0, the component_size might be zero.
         * So try to fix that up.
         */
-       if (ioctl(fd, GET_ARRAY_INFO, &array) != 0) {
+       if (md_get_array_info(fd, &array) != 0) {
                dprintf("Cannot get array information.\n");
                goto release;
        }
@@ -2824,12 +3080,13 @@ static int reshape_array(char *container, int fd, char *devname,
             reshape.before.layout != info->array.layout ||
             reshape.before.data_disks + reshape.parity
             != info->array.raid_disks - max(0, info->delta_disks))) {
-               pr_err("reshape info is not in native format -"
-                       " cannot continue.\n");
+               pr_err("reshape info is not in native format - cannot continue.\n");
                goto release;
        }
 
-       if (st->ss->external && restart && (info->reshape_progress == 0)) {
+       if (st->ss->external && restart && (info->reshape_progress == 0) &&
+           !((sysfs_get_str(info, NULL, "sync_action", buf, sizeof(buf)) > 0) &&
+             (strncmp(buf, "reshape", 7) == 0))) {
                /* When reshape is restarted from '0', very begin of array
                 * it is possible that for external metadata reshape and array
                 * configuration doesn't happen.
@@ -2880,8 +3137,7 @@ static int reshape_array(char *container, int fd, char *devname,
        if (!force &&
            info->new_level > 1 && info->array.level > 1 &&
            spares_needed > info->array.spare_disks + added_disks) {
-               pr_err("Need %d spare%s to avoid degraded array,"
-                      " and only have %d.\n"
+               pr_err("Need %d spare%s to avoid degraded array, and only have %d.\n"
                       "       Use --force to over-ride this check.\n",
                       spares_needed,
                       spares_needed == 1 ? "" : "s",
@@ -2894,8 +3150,7 @@ static int reshape_array(char *container, int fd, char *devname,
                - array.raid_disks;
        if ((info->new_level > 1 || info->new_level == 0) &&
            spares_needed > info->array.spare_disks +added_disks) {
-               pr_err("Need %d spare%s to create working array,"
-                      " and only have %d.\n",
+               pr_err("Need %d spare%s to create working array, and only have %d.\n",
                       spares_needed,
                       spares_needed == 1 ? "" : "s",
                       info->array.spare_disks + added_disks);
@@ -2962,9 +3217,11 @@ static int reshape_array(char *container, int fd, char *devname,
         * array.  Now that the array has been changed to the right
         * level and frozen, we can safely add them.
         */
-       if (devlist)
-               Manage_subdevs(devname, fd, devlist, verbose,
-                              0,NULL, 0);
+       if (devlist) {
+               if (Manage_subdevs(devname, fd, devlist, verbose,
+                                  0, NULL, 0))
+                       goto release;
+       }
 
        if (reshape.backup_blocks == 0 && data_offset != INVALID_SECTORS)
                reshape.backup_blocks = reshape.before.data_disks * info->array.chunk_size/512;
@@ -2973,7 +3230,7 @@ static int reshape_array(char *container, int fd, char *devname,
                 * some more changes: layout, raid_disks, chunk_size
                 */
                /* read current array info */
-               if (ioctl(fd, GET_ARRAY_INFO, &array) != 0) {
+               if (md_get_array_info(fd, &array) != 0) {
                        dprintf("Cannot get array information.\n");
                        goto release;
                }
@@ -3102,8 +3359,19 @@ static int reshape_array(char *container, int fd, char *devname,
                                   devname, container, &reshape) < 0)
                        goto release;
                if (sysfs_set_str(sra, NULL, "sync_action", "reshape") < 0) {
-                       pr_err("Failed to initiate reshape!\n");
-                       goto release;
+                       struct mdinfo *sd;
+                       if (errno != EINVAL) {
+                               pr_err("Failed to initiate reshape!\n");
+                               goto release;
+                       }
+                       /* revert data_offset and try the old way */
+                       for (sd = sra->devs; sd; sd = sd->next) {
+                               sysfs_set_num(sra, sd, "new_offset",
+                                             sd->data_offset);
+                               sysfs_set_str(sra, NULL, "reshape_direction",
+                                             "forwards");
+                       }
+                       break;
                }
                if (info->new_level == reshape.level)
                        return 0;
@@ -3146,12 +3414,10 @@ started:
                       blocks < 16*1024*2)
                        blocks *= 2;
        } else
-               pr_err("Need to backup %luK of critical "
-                       "section..\n", blocks/2);
+               pr_err("Need to backup %luK of critical section..\n", blocks/2);
 
        if (blocks >= sra->component_size/2) {
-               pr_err("%s: Something wrong"
-                       " - reshape aborted\n",
+               pr_err("%s: Something wrong - reshape aborted\n",
                        devname);
                goto release;
        }
@@ -3168,7 +3434,7 @@ started:
        d = reshape_prepare_fdlist(devname, sra, odisks,
                                   nrdisks, blocks, backup_file,
                                   fdlist, offsets);
-       if (d < 0) {
+       if (d < odisks) {
                goto release;
        }
        if ((st->ss->manage_reshape == NULL) ||
@@ -3180,10 +3446,8 @@ started:
                                       devname);
                                pr_err(" Please provide one with \"--backup=...\"\n");
                                goto release;
-                       } else if (sra->array.spare_disks == 0) {
-                               pr_err("%s: Cannot grow - "
-                                       "need a spare or backup-file to backup "
-                                       "critical section\n", devname);
+                       } else if (d == odisks) {
+                               pr_err("%s: Cannot grow - need a spare or backup-file to backup critical section\n", devname);
                                goto release;
                        }
                } else {
@@ -3226,60 +3490,18 @@ started:
                free(fdlist);
                free(offsets);
                sysfs_free(sra);
-               pr_err("Reshape has to be continued from"
-                       " location %llu when root filesystem has been mounted.\n",
+               pr_err("Reshape has to be continued from location %llu when root filesystem has been mounted.\n",
                        sra->reshape_progress);
                return 1;
        }
 
-       if (!forked && !check_env("MDADM_NO_SYSTEMCTL")) {
-               int skipped, i, pid, status;
-               char pathbuf[1024];
-               char *devnm;
-               /* In a systemd/udev world, it is best to get systemd to
-                * run "mdadm --grow --continue" rather than running in the
-                * background.
-                */
-               if (container)
-                       devnm = container;
-               else
-                       devnm = sra->sys_name;
-               switch(fork()) {
-               case  0:
-                       /* FIXME yuk. CLOSE_EXEC?? */
-                       skipped = 0;
-                       for (i = 3; skipped < 20; i++)
-                               if (close(i) < 0)
-                                       skipped++;
-                               else
-                                       skipped = 0;
-
-                       /* Don't want to see error messages from
-                        * systemctl.  If the service doesn't exist,
-                        * we fork ourselves.
-                        */
-                       close(2);
-                       open("/dev/null", O_WRONLY);
-                       snprintf(pathbuf, sizeof(pathbuf), "mdadm-grow-continue@%s.service",
-                                devnm);
-                       status = execl("/usr/bin/systemctl", "systemctl",
-                                      "start",
-                                      pathbuf, NULL);
-                       status = execl("/bin/systemctl", "systemctl", "start",
-                                      pathbuf, NULL);
-                       exit(1);
-               case -1: /* Just do it ourselves. */
-                       break;
-               default: /* parent - good */
-                       pid = wait(&status);
-                       if (pid >= 0 && status == 0) {
-                               free(fdlist);
-                               free(offsets);
-                               sysfs_free(sra);
-                               return 0;
-                       }
+       if (!forked && !check_env("MDADM_NO_SYSTEMCTL"))
+               if (continue_via_systemd(container ?: sra->sys_name)) {
+                       free(fdlist);
+                       free(offsets);
+                       sysfs_free(sra);
+                       return 0;
                }
-       }
 
        /* Now we just need to kick off the reshape and watch, while
         * handling backups of the data...
@@ -3365,7 +3587,7 @@ started:
                bul = make_backup(sra->sys_name);
                if (bul) {
                        char buf[1024];
-                       int l = readlink(bul, buf, sizeof(buf));
+                       int l = readlink(bul, buf, sizeof(buf) - 1);
                        if (l > 0) {
                                buf[l]=0;
                                unlink(buf);
@@ -3448,8 +3670,8 @@ int reshape_container(char *container, char *devname,
                      struct supertype *st,
                      struct mdinfo *info,
                      int force,
-                     char *backup_file,
-                     int verbose, int restart, int freeze_reshape)
+                     char *backup_file, int verbose,
+                     int forked, int restart, int freeze_reshape)
 {
        struct mdinfo *cc = NULL;
        int rv = restart;
@@ -3474,15 +3696,18 @@ int reshape_container(char *container, char *devname,
         */
        ping_monitor(container);
 
-       switch (fork()) {
+       if (!forked && !freeze_reshape && !check_env("MDADM_NO_SYSTEMCTL"))
+               if (continue_via_systemd(container))
+                       return 0;
+
+       switch (forked ? 0 : fork()) {
        case -1: /* error */
                perror("Cannot fork to complete reshape\n");
                unfreeze(st);
                return 1;
        default: /* parent */
                if (!freeze_reshape)
-                       printf(Name ": multi-array reshape continues"
-                              " in background\n");
+                       printf("%s: multi-array reshape continues in background\n", Name);
                return 0;
        case 0: /* child */
                map_fork();
@@ -3510,7 +3735,7 @@ int reshape_container(char *container, char *devname,
                int fd;
                struct mdstat_ent *mdstat;
                char *adev;
-               int devid;
+               dev_t devid;
 
                sysfs_free(cc);
 
@@ -3544,8 +3769,7 @@ int reshape_container(char *container, char *devname,
 
                fd = open_dev(mdstat->devnm);
                if (fd < 0) {
-                       printf(Name ": Device %s cannot be opened for reshape.",
-                              adev);
+                       pr_err("Device %s cannot be opened for reshape.\n", adev);
                        break;
                }
 
@@ -3560,8 +3784,7 @@ int reshape_container(char *container, char *devname,
                         * This is possibly interim until the behaviour of
                         * reshape_array is resolved().
                         */
-                       printf(Name ": Multiple reshape execution detected for "
-                              "device  %s.", adev);
+                       printf("%s: Multiple reshape execution detected for device  %s.\n", Name, adev);
                        close(fd);
                        break;
                }
@@ -3871,9 +4094,11 @@ int progress_reshape(struct mdinfo *info, struct reshape *reshape,
                 * So we need these extra tests.
                 */
                if (completed == 0 && advancing
+                   && strncmp(action, "idle", 4) == 0
                    && info->reshape_progress > 0)
                        break;
                if (completed == 0 && !advancing
+                   && strncmp(action, "idle", 4) == 0
                    && info->reshape_progress < (info->component_size
                                                 * reshape->after.data_disks))
                        break;
@@ -3882,21 +4107,31 @@ int progress_reshape(struct mdinfo *info, struct reshape *reshape,
                        goto check_progress;
        }
        /* Some kernels reset 'sync_completed' to zero,
-        * we need to have real point we are in md
+        * we need to have real point we are in md.
+        * So in that case, read 'reshape_position' from sysfs.
         */
-       if (completed == 0)
-               completed = max_progress;
-
-       /* some kernels can give an incorrectly high 'completed' number */
-       completed /= (info->new_chunk/512);
-       completed *= (info->new_chunk/512);
-       /* Convert 'completed' back in to a 'progress' number */
-       completed *= reshape->after.data_disks;
-       if (!advancing) {
-               completed = info->component_size * reshape->after.data_disks
-                       - completed;
+       if (completed == 0) {
+               unsigned long long reshapep;
+               char action[20];
+               if (sysfs_get_str(info, NULL, "sync_action",
+                                 action, 20) > 0 &&
+                   strncmp(action, "idle", 4) == 0 &&
+                   sysfs_get_ll(info, NULL,
+                                "reshape_position", &reshapep) == 0)
+                       *reshape_completed = reshapep;
+       } else {
+               /* some kernels can give an incorrectly high
+                * 'completed' number, so round down */
+               completed /= (info->new_chunk/512);
+               completed *= (info->new_chunk/512);
+               /* Convert 'completed' back in to a 'progress' number */
+               completed *= reshape->after.data_disks;
+               if (!advancing)
+                       completed = (info->component_size
+                                    * reshape->after.data_disks
+                                    - completed);
+               *reshape_completed = completed;
        }
-       *reshape_completed = completed;
 
        close(fd);
 
@@ -3916,7 +4151,6 @@ check_progress:
         * it was just a device failure that leaves us degraded but
         * functioning.
         */
-       strcpy(buf, "hi");
        if (sysfs_get_str(info, NULL, "reshape_position", buf, sizeof(buf)) < 0
            || strncmp(buf, "none", 4) != 0) {
                /* The abort might only be temporary.  Wait up to 10
@@ -3996,8 +4230,10 @@ static int grow_backup(struct mdinfo *sra,
                        if (sd->disk.state & (1<<MD_DISK_FAULTY))
                                continue;
                        if (sd->disk.state & (1<<MD_DISK_SYNC)) {
-                               char sbuf[20];
-                               if (sysfs_get_str(sra, sd, "state", sbuf, 20) < 0 ||
+                               char sbuf[100];
+
+                               if (sysfs_get_str(sra, sd, "state",
+                                                 sbuf, sizeof(sbuf)) < 0 ||
                                    strstr(sbuf, "faulty") ||
                                    strstr(sbuf, "in_sync") == NULL) {
                                        /* this device is dead */
@@ -4513,11 +4749,10 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt
                 * sometimes they aren't... So allow considerable flexability in matching, and allow
                 * this test to be overridden by an environment variable.
                 */
-               if (info->array.utime > (int)__le64_to_cpu(bsb.mtime) + 2*60*60 ||
-                   info->array.utime < (int)__le64_to_cpu(bsb.mtime) - 10*60) {
+               if(time_after(info->array.utime, (unsigned int)__le64_to_cpu(bsb.mtime) + 2*60*60) ||
+                  time_before(info->array.utime, (unsigned int)__le64_to_cpu(bsb.mtime) - 10*60)) {
                        if (check_env("MDADM_GROW_ALLOW_OLD")) {
-                               pr_err("accepting backup with timestamp %lu "
-                                       "for array with timestamp %lu\n",
+                               pr_err("accepting backup with timestamp %lu for array with timestamp %lu\n",
                                        (unsigned long)__le64_to_cpu(bsb.mtime),
                                        (unsigned long)info->array.utime);
                        } else {
@@ -4598,7 +4833,7 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt
                        st->ss->free_super(st);
                        offsets[j] = dinfo.data_offset * 512;
                }
-               printf(Name ": restoring critical section\n");
+               printf("%s: restoring critical section\n", Name);
 
                if (restore_stripes(fdlist, offsets,
                                    info->array.raid_disks,
@@ -4743,7 +4978,7 @@ int Grow_continue_command(char *devname, int fd,
        struct mdinfo *cc = NULL;
        struct mdstat_ent *mdstat = NULL;
        int cfd = -1;
-       int fd2 = -1;
+       int fd2;
 
        dprintf("Grow continue from command line called for %s\n",
                devname);
@@ -4757,10 +4992,11 @@ int Grow_continue_command(char *devname, int fd,
        dprintf("Grow continue is run for ");
        if (st->ss->external == 0) {
                int d;
-               dprintf("native array (%s)\n", devname);
-               if (ioctl(fd, GET_ARRAY_INFO, &array.array) < 0) {
-                       pr_err("%s is not an active md array -"
-                               " aborting\n", devname);
+               int cnt = 5;
+               dprintf_cont("native array (%s)\n", devname);
+               if (md_get_array_info(fd, &array.array) < 0) {
+                       pr_err("%s is not an active md array - aborting\n",
+                              devname);
                        ret_val = 1;
                        goto Grow_continue_command_exit;
                }
@@ -4769,55 +5005,58 @@ int Grow_continue_command(char *devname, int fd,
                 * FIXME we should really get what we need from
                 * sysfs
                 */
-               for (d = 0; d < MAX_DISKS; d++) {
-                       mdu_disk_info_t disk;
-                       char *dv;
-                       int err;
-                       disk.number = d;
-                       if (ioctl(fd, GET_DISK_INFO, &disk) < 0)
-                               continue;
-                       if (disk.major == 0 && disk.minor == 0)
-                               continue;
-                       if ((disk.state & (1 << MD_DISK_ACTIVE)) == 0)
-                               continue;
-                       dv = map_dev(disk.major, disk.minor, 1);
-                       if (!dv)
-                               continue;
-                       fd2 = dev_open(dv, O_RDONLY);
-                       if (fd2 < 0)
-                               continue;
-                       err = st->ss->load_super(st, fd2, NULL);
-                       close(fd2);
-                       /* invalidate fd2 to avoid possible double close() */
-                       fd2 = -1;
-                       if (err)
-                               continue;
-                       break;
-               }
-               if (d == MAX_DISKS) {
-                       pr_err("Unable to load metadata for %s\n",
-                              devname);
-                       ret_val = 1;
-                       goto Grow_continue_command_exit;
-               }
-               st->ss->getinfo_super(st, content, NULL);
+               do {
+                       for (d = 0; d < MAX_DISKS; d++) {
+                               mdu_disk_info_t disk;
+                               char *dv;
+                               int err;
+                               disk.number = d;
+                               if (ioctl(fd, GET_DISK_INFO, &disk) < 0)
+                                       continue;
+                               if (disk.major == 0 && disk.minor == 0)
+                                       continue;
+                               if ((disk.state & (1 << MD_DISK_ACTIVE)) == 0)
+                                       continue;
+                               dv = map_dev(disk.major, disk.minor, 1);
+                               if (!dv)
+                                       continue;
+                               fd2 = dev_open(dv, O_RDONLY);
+                               if (fd2 < 0)
+                                       continue;
+                               err = st->ss->load_super(st, fd2, NULL);
+                               close(fd2);
+                               if (err)
+                                       continue;
+                               break;
+                       }
+                       if (d == MAX_DISKS) {
+                               pr_err("Unable to load metadata for %s\n",
+                                      devname);
+                               ret_val = 1;
+                               goto Grow_continue_command_exit;
+                       }
+                       st->ss->getinfo_super(st, content, NULL);
+                       if (!content->reshape_active)
+                               sleep(3);
+                       else
+                               break;
+               } while (cnt-- > 0);
        } else {
                char *container;
 
                if (subarray) {
-                       dprintf("subarray (%s)\n", subarray);
+                       dprintf_cont("subarray (%s)\n", subarray);
                        container = st->container_devnm;
                        cfd = open_dev_excl(st->container_devnm);
                } else {
                        container = st->devnm;
                        close(fd);
                        cfd = open_dev_excl(st->devnm);
-                       dprintf("container (%s)\n", container);
+                       dprintf_cont("container (%s)\n", container);
                        fd = cfd;
                }
                if (cfd < 0) {
-                       pr_err("Unable to open container "
-                               "for %s\n", devname);
+                       pr_err("Unable to open container for %s\n", devname);
                        ret_val = 1;
                        goto Grow_continue_command_exit;
                }
@@ -4853,9 +5092,7 @@ int Grow_continue_command(char *devname, int fd,
                                allow_reshape = 0;
 
                        if (!allow_reshape) {
-                               pr_err("cannot continue reshape of an array"
-                                      " in container with unsupported"
-                                      " metadata: %s(%s)\n",
+                               pr_err("cannot continue reshape of an array in container with unsupported metadata: %s(%s)\n",
                                       devname, container);
                                ret_val = 1;
                                goto Grow_continue_command_exit;
@@ -4875,8 +5112,7 @@ int Grow_continue_command(char *devname, int fd,
                        break;
                }
                if (!content) {
-                       pr_err("Unable to determine reshaped "
-                              "array for %s\n", devname);
+                       pr_err("Unable to determine reshaped array for %s\n", devname);
                        ret_val = 1;
                        goto Grow_continue_command_exit;
                }
@@ -4889,6 +5125,8 @@ int Grow_continue_command(char *devname, int fd,
 
                sysfs_init(content, fd2, mdstat->devnm);
 
+               close(fd2);
+
                /* start mdmon in case it is not running
                 */
                if (!mdmon_running(container))
@@ -4898,8 +5136,7 @@ int Grow_continue_command(char *devname, int fd,
                if (mdmon_running(container))
                        st->update_tail = &st->updates;
                else {
-                       pr_err("No mdmon found. "
-                               "Grow cannot continue.\n");
+                       pr_err("No mdmon found. Grow cannot continue.\n");
                        ret_val = 1;
                        goto Grow_continue_command_exit;
                }
@@ -4915,11 +5152,9 @@ int Grow_continue_command(char *devname, int fd,
 
        /* continue reshape
         */
-       ret_val = Grow_continue(fd, st, content, backup_file, 0);
+       ret_val = Grow_continue(fd, st, content, backup_file, 1, 0);
 
 Grow_continue_command_exit:
-       if (fd2 > -1)
-               close(fd2);
        if (cfd > -1)
                close(cfd);
        st->ss->free_super(st);
@@ -4931,7 +5166,7 @@ Grow_continue_command_exit:
 }
 
 int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info,
-                 char *backup_file, int freeze_reshape)
+                 char *backup_file, int forked, int freeze_reshape)
 {
        int ret_val = 2;
 
@@ -4948,13 +5183,13 @@ int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info,
                close(cfd);
                ret_val = reshape_container(st->container_devnm, NULL, mdfd,
                                            st, info, 0, backup_file,
-                                           0,
+                                           0, forked,
                                            1 | info->reshape_active,
                                            freeze_reshape);
        } else
                ret_val = reshape_array(NULL, mdfd, "array", st, info, 1,
                                        NULL, INVALID_SECTORS,
-                                       backup_file, 0, 1,
+                                       backup_file, 0, forked,
                                        1 | info->reshape_active,
                                        freeze_reshape);