]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - Grow.c
FIX: Respect metadata size limitations
[thirdparty/mdadm.git] / Grow.c
diff --git a/Grow.c b/Grow.c
index e7fd7c42245759fad4663f52384850053c5efb95..86d10203486a5e3d7f670b70f380f537ce4dc7f3 100644 (file)
--- a/Grow.c
+++ b/Grow.c
@@ -375,12 +375,18 @@ int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int
                return 1;
        }
        if (strcmp(file, "internal") == 0) {
+               int rv;
                int d;
+               int offset_setable = 0;
+               struct mdinfo *mdi;
                if (st->ss->add_internal_bitmap == NULL) {
                        fprintf(stderr, Name ": Internal bitmaps not supported "
                                "with %s metadata\n", st->ss->name);
                        return 1;
                }
+               mdi = sysfs_read(fd, -1, GET_BITMAP_LOCATION);
+               if (mdi)
+                       offset_setable = 1;
                for (d=0; d< st->max_devs; d++) {
                        mdu_disk_info_t disk;
                        char *dv;
@@ -401,11 +407,13 @@ int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int
                                        if (st->ss->add_internal_bitmap(
                                                    st,
                                                    &chunk, delay, write_behind,
-                                                   bitmapsize, 0, major)
+                                                   bitmapsize, offset_setable,
+                                                   major)
                                                )
                                                st->ss->write_bitmap(st, fd2);
                                        else {
-                                               fprintf(stderr, Name ": failed to create internal bitmap - chunksize problem.\n");
+                                               fprintf(stderr, Name ": failed "
+                               "to create internal bitmap - chunksize problem.\n");
                                                close(fd2);
                                                return 1;
                                        }
@@ -413,8 +421,16 @@ int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int
                                close(fd2);
                        }
                }
-               array.state |= (1<<MD_SB_BITMAP_PRESENT);
-               if (ioctl(fd, SET_ARRAY_INFO, &array)!= 0) {
+               if (offset_setable) {
+                       st->ss->getinfo_super(st, mdi, NULL);
+                       sysfs_init(mdi, fd, -1);
+                       rv = sysfs_set_num(mdi, NULL, "bitmap/location",
+                                          mdi->bitmap_offset);
+               } else {
+                       array.state |= (1<<MD_SB_BITMAP_PRESENT);
+                       rv = ioctl(fd, SET_ARRAY_INFO, &array);
+               }
+               if (rv < 0) {
                        if (errno == EBUSY)
                                fprintf(stderr, Name
                                        ": Cannot add bitmap while array is"
@@ -480,7 +496,6 @@ int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int
        return 0;
 }
 
-
 /*
  * When reshaping an array we might need to backup some data.
  * This is written to all spares with a 'super_block' describing it.
@@ -526,7 +541,7 @@ static int check_idle(struct supertype *st)
        char container[40];
        struct mdstat_ent *ent, *e;
        int is_idle = 1;
-       
+
        fmt_devname(container, container_dev);
        ent = mdstat_read(0, 0);
        for (e = ent ; e; e = e->next) {
@@ -549,7 +564,7 @@ static int freeze_container(struct supertype *st)
 
        if (!check_idle(st))
                return -1;
-       
+
        fmt_devname(container, container_dev);
 
        if (block_monitor(container, 1)) {
@@ -565,7 +580,7 @@ static void unfreeze_container(struct supertype *st)
        int container_dev = (st->container_dev != NoMdDev
                             ? st->container_dev : st->devnum);
        char container[40];
-       
+
        fmt_devname(container, container_dev);
 
        unblock_monitor(container, 1);
@@ -635,7 +650,7 @@ static void wait_reshape(struct mdinfo *sra)
 static int reshape_super(struct supertype *st, long long size, int level,
                         int layout, int chunksize, int raid_disks,
                         int delta_disks, char *backup_file, char *dev,
-                        int verbose)
+                        int direction, int verbose)
 {
        /* nothing extra to check in the native case */
        if (!st->ss->external)
@@ -649,7 +664,7 @@ static int reshape_super(struct supertype *st, long long size, int level,
 
        return st->ss->reshape_super(st, size, level, layout, chunksize,
                                     raid_disks, delta_disks, backup_file, dev,
-                                    verbose);
+                                    direction, verbose);
 }
 
 static void sync_metadata(struct supertype *st)
@@ -698,7 +713,8 @@ static int subarray_set_num(char *container, struct mdinfo *sra, char *name, int
        return rc;
 }
 
-int start_reshape(struct mdinfo *sra, int already_running, int data_disks)
+int start_reshape(struct mdinfo *sra, int already_running,
+                 int before_data_disks, int data_disks)
 {
        int err;
        unsigned long long sync_max_to_set;
@@ -707,7 +723,11 @@ int start_reshape(struct mdinfo *sra, int already_running, int data_disks)
        err = sysfs_set_num(sra, NULL, "suspend_hi", sra->reshape_progress);
        err = err ?: sysfs_set_num(sra, NULL, "suspend_lo",
                                   sra->reshape_progress);
-       sync_max_to_set = sra->reshape_progress / data_disks;
+       if (before_data_disks <= data_disks)
+               sync_max_to_set = sra->reshape_progress / data_disks;
+       else
+               sync_max_to_set = (sra->component_size * data_disks
+                                  - sra->reshape_progress) / data_disks;
        if (!already_running)
                sysfs_set_num(sra, NULL, "sync_min", sync_max_to_set);
        err = err ?: sysfs_set_num(sra, NULL, "sync_max", sync_max_to_set);
@@ -1003,6 +1023,10 @@ char *analyse_change(struct mdinfo *info, struct reshape *re)
                 * raid5 with 2 disks, or
                 * raid0 with 1 disk
                 */
+               if (info->new_level > 1 &&
+                   (info->component_size & 7))
+                       return "Cannot convert RAID1 of this size - "
+                               "reduce size to multiple of 4K first.";
                if (info->new_level == 0) {
                        if (info->delta_disks != UnSet &&
                            info->delta_disks != 0)
@@ -1255,7 +1279,7 @@ char *analyse_change(struct mdinfo *info, struct reshape *re)
                break;
 
        case 5:
-               /* We get to RAID5 for RAID5 or RAID6 */
+               /* We get to RAID5 from RAID5 or RAID6 */
                if (re->level != 5 && re->level != 6)
                        return "Cannot convert to RAID5 from this level";
 
@@ -1277,11 +1301,27 @@ char *analyse_change(struct mdinfo *info, struct reshape *re)
                                char layout[40];
                                char *ls = map_num(r5layout, info->new_layout);
                                int l;
-                               strcat(strcpy(layout, ls), "-6");
-                               l = map_name(r6layout, layout);
-                               if (l == UnSet)
-                                       return "Cannot find RAID6 layout"
-                                               " to convert to";
+                               if (ls) {
+                                       /* Current RAID6 layout has a RAID5
+                                        * equivalent - good
+                                        */
+                                       strcat(strcpy(layout, ls), "-6");
+                                       l = map_name(r6layout, layout);
+                                       if (l == UnSet)
+                                               return "Cannot find RAID6 layout"
+                                                       " to convert to";
+                               } else {
+                                       /* Current RAID6 has no equivalent.
+                                        * If it is already a '-6' layout we
+                                        * can leave it unchanged, else we must
+                                        * fail
+                                        */
+                                       ls = map_num(r6layout, info->new_layout);
+                                       if (!ls ||
+                                           strcmp(ls+strlen(ls)-2, "-6") != 0)
+                                               return "Please specify new layout";
+                                       l = info->new_layout;
+                               }
                                re->after.layout = l;
                        }
                }
@@ -1346,6 +1386,44 @@ char *analyse_change(struct mdinfo *info, struct reshape *re)
        return NULL;
 }
 
+static int set_array_size(struct supertype *st, struct mdinfo *sra,
+                         char *text_version)
+{
+       struct mdinfo *info;
+       char *subarray;
+       int ret_val = -1;
+
+       if ((st == NULL) || (sra == NULL))
+               return ret_val;
+
+       if (text_version == NULL)
+               text_version = sra->text_version;
+       subarray = strchr(text_version+1, '/')+1;
+       info = st->ss->container_content(st, subarray);
+       if (info) {
+               unsigned long long current_size = 0;
+               unsigned long long new_size =
+                       info->custom_array_size/2;
+
+               if (sysfs_get_ll(sra, NULL, "array_size", &current_size) == 0 &&
+                   new_size > current_size) {
+                       if (sysfs_set_num(sra, NULL, "array_size", new_size)
+                                       < 0)
+                               dprintf("Error: Cannot set array size");
+                       else {
+                               ret_val = 0;
+                               dprintf("Array size changed");
+                       }
+                       dprintf(" from %llu to %llu.\n",
+                               current_size, new_size);
+               }
+               sysfs_free(info);
+       } else
+               dprintf("Error: set_array_size(): info pointer in NULL\n");
+
+       return ret_val;
+}
+
 static int reshape_array(char *container, int fd, char *devname,
                         struct supertype *st, struct mdinfo *info,
                         int force, struct mddev_dev *devlist,
@@ -1353,7 +1431,7 @@ static int reshape_array(char *container, int fd, char *devname,
                         int restart, int freeze_reshape);
 static int reshape_container(char *container, char *devname,
                             int mdfd,
-                            struct supertype *st, 
+                            struct supertype *st,
                             struct mdinfo *info,
                             int force,
                             char *backup_file,
@@ -1513,15 +1591,15 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                        Name ": Need %d spare%s to avoid degraded array,"
                        " and only have %d.\n"
                        "       Use --force to over-ride this check.\n",
-                       raid_disks - array.raid_disks, 
-                       raid_disks - array.raid_disks == 1 ? "" : "s", 
+                       raid_disks - array.raid_disks,
+                       raid_disks - array.raid_disks == 1 ? "" : "s",
                        array.spare_disks + added_disks);
                return 1;
        }
 
        sra = sysfs_read(fd, 0, GET_LEVEL | GET_DISKS | GET_DEVS
                         | GET_STATE | GET_VERSION);
-       if (sra) {
+       if (sra) {
                if (st->ss->external && subarray == NULL) {
                        array.level = LEVEL_CONTAINER;
                        sra->array.level = LEVEL_CONTAINER;
@@ -1548,16 +1626,38 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                long long orig_size = get_component_size(fd)/2;
                long long min_csize;
                struct mdinfo *mdi;
+               int raid0_takeover = 0;
 
                if (orig_size == 0)
                        orig_size = array.size;
 
                if (reshape_super(st, size, UnSet, UnSet, 0, 0, UnSet, NULL,
-                                 devname, !quiet)) {
+                                 devname, APPLY_METADATA_CHANGES, !quiet)) {
                        rv = 1;
                        goto release;
                }
                sync_metadata(st);
+               if (st->ss->external) {
+                       /* metadata can have size limitation
+                        * update size value according to metadata information
+                        */
+                       struct mdinfo *sizeinfo =
+                               st->ss->container_content(st, subarray);
+                       if (sizeinfo) {
+                               unsigned long long new_size =
+                                       sizeinfo->custom_array_size/2;
+                               int data_disks = get_data_disks(
+                                               sizeinfo->array.level,
+                                               sizeinfo->array.layout,
+                                               sizeinfo->array.raid_disks);
+                               new_size /= data_disks;
+                               dprintf("Metadata size correction from %llu to "
+                                       "%llu (%llu)\n", orig_size, new_size,
+                                       new_size * data_disks);
+                               size = new_size;
+                               sysfs_free(sizeinfo);
+                       }
+               }
 
                /* Update the size of each member device in case
                 * they have been resized.  This will never reduce
@@ -1596,6 +1696,27 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                                "2TB per device\n");
                        size = min_csize;
                }
+               if (st->ss->external) {
+                       if (sra->array.level == 0) {
+                               rv = sysfs_set_str(sra, NULL, "level",
+                                                  "raid5");
+                               if (!rv) {
+                                       raid0_takeover = 1;
+                                       /* get array parametes after takeover
+                                        * to chane one parameter at time only
+                                        */
+                                       rv = ioctl(fd, GET_ARRAY_INFO, &array);
+                               }
+                       }
+                       /* make sure mdmon is
+                        * aware of the new level */
+                       if (!mdmon_running(st->container_dev))
+                               start_mdmon(st->container_dev);
+                       ping_monitor(container);
+                       if (mdmon_running(st->container_dev) &&
+                                       st->update_tail == NULL)
+                               st->update_tail = &st->updates;
+               }
 
                array.size = size;
                if (array.size != size) {
@@ -1607,18 +1728,38 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                                                   "component_size", size);
                        else
                                rv = -1;
-               } else
+               } else {
                        rv = ioctl(fd, SET_ARRAY_INFO, &array);
+
+                       /* manage array size when it is managed externally
+                        */
+                       if ((rv == 0) && st->ss->external)
+                               rv = set_array_size(st, sra, sra->text_version);
+               }
+
+               if (raid0_takeover) {
+                       /* do not recync non-existing parity,
+                        * we will drop it anyway
+                        */
+                       sysfs_set_str(sra, NULL, "sync_action", "idle");
+                       /* go back to raid0, drop parity disk
+                        */
+                       sysfs_set_str(sra, NULL, "level", "raid0");
+                       ioctl(fd, GET_ARRAY_INFO, &array);
+               }
+
                if (rv != 0) {
                        int err = errno;
 
                        /* restore metadata */
                        if (reshape_super(st, orig_size, UnSet, UnSet, 0, 0,
-                                         UnSet, NULL, devname, !quiet) == 0)
+                                         UnSet, NULL, devname,
+                                         ROLLBACK_METADATA_CHANGES,
+                                         !quiet) == 0)
                                sync_metadata(st);
                        fprintf(stderr, Name ": Cannot set device size for %s: %s\n",
                                devname, strerror(err));
-                       if (err == EBUSY && 
+                       if (err == EBUSY &&
                            (array.state & (1<<MD_SB_BITMAP_PRESENT)))
                                fprintf(stderr, "       Bitmap must be removed before size can be changed\n");
                        rv = 1;
@@ -1668,11 +1809,11 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
        /* ========= check for Raid10/Raid1 -> Raid0 conversion ===============
         * current implementation assumes that following conditions must be met:
         * - RAID10:
-        *      - far_copies == 1
-        *      - near_copies == 2
+        *      - far_copies == 1
+        *      - near_copies == 2
         */
        if ((level == 0 && array.level == 10 && sra &&
-           array.layout == ((1 << 8) + 2) && !(array.raid_disks & 1)) ||
+            array.layout == ((1 << 8) + 2) && !(array.raid_disks & 1)) ||
            (level == 0 && array.level == 1 && sra)) {
                int err;
                err = remove_disks_for_takeover(st, sra, array.layout);
@@ -1722,7 +1863,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                        goto release;
                }
        } else if (strcmp(layout_str, "normalise") == 0 ||
-                strcmp(layout_str, "normalize") == 0) {
+                  strcmp(layout_str, "normalize") == 0) {
                /* If we have a -6 RAID6 layout, remove the '-6'. */
                info.new_layout = UnSet;
                if (info.array.level == 6 && info.new_level == UnSet) {
@@ -1826,7 +1967,8 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                if (reshape_super(st, info.component_size, info.new_level,
                                  info.new_layout, info.new_chunk,
                                  info.array.raid_disks, info.delta_disks,
-                                 backup_file, devname, quiet)) {
+                                 backup_file, devname, APPLY_METADATA_CHANGES,
+                                 quiet)) {
                        rv = 1;
                        goto release;
                }
@@ -1842,6 +1984,63 @@ release:
        return rv;
 }
 
+/* verify_reshape_position()
+ *     Function checks if reshape position in metadata is not farther
+ *     than position in md.
+ * Return value:
+ *      0 : not valid sysfs entry
+ *             it can be caused by not started reshape, it should be started
+ *             by reshape array or raid0 array is before takeover
+ *     -1 :    error, reshape position is obviously wrong
+ *      1 :    success, reshape progress correct or updated
+*/
+static int verify_reshape_position(struct mdinfo *info, int level)
+{
+       int ret_val = 0;
+       char buf[40];
+       int rv;
+
+       /* read sync_max, failure can mean raid0 array */
+       rv = sysfs_get_str(info, NULL, "sync_max", buf, 40);
+
+       if (rv > 0) {
+               char *ep;
+               unsigned long long position = strtoull(buf, &ep, 0);
+
+               dprintf(Name": Read sync_max sysfs entry is: %s\n", buf);
+               if (!(ep == buf || (*ep != 0 && *ep != '\n' && *ep != ' '))) {
+                       position *= get_data_disks(level,
+                                                  info->new_layout,
+                                                  info->array.raid_disks);
+                       if (info->reshape_progress < position) {
+                               dprintf("Corrected reshape progress (%llu) to "
+                                       "md position (%llu)\n",
+                                       info->reshape_progress, position);
+                               info->reshape_progress = position;
+                               ret_val = 1;
+                       } else if (info->reshape_progress > position) {
+                               fprintf(stderr, Name ": Fatal error: array "
+                                       "reshape was not properly frozen "
+                                       "(expected reshape position is %llu, "
+                                       "but reshape progress is %llu.\n",
+                                       position, info->reshape_progress);
+                               ret_val = -1;
+                       } else {
+                               dprintf("Reshape position in md and metadata "
+                                       "are the same;");
+                               ret_val = 1;
+                       }
+               }
+       } else if (rv == 0) {
+               /* for valid sysfs entry, 0-length content
+                * should be indicated as error
+                */
+               ret_val = -1;
+       }
+
+       return ret_val;
+}
+
 static int reshape_array(char *container, int fd, char *devname,
                         struct supertype *st, struct mdinfo *info,
                         int force, struct mddev_dev *devlist,
@@ -1886,10 +2085,12 @@ static int reshape_array(char *container, int fd, char *devname,
        if (info->reshape_active) {
                int new_level = info->new_level;
                info->new_level = UnSet;
-               info->array.raid_disks -= info->delta_disks;
+               if (info->delta_disks > 0)
+                       info->array.raid_disks -= info->delta_disks;
                msg = analyse_change(info, &reshape);
                info->new_level = new_level;
-               info->array.raid_disks += info->delta_disks;
+               if (info->delta_disks > 0)
+                       info->array.raid_disks += info->delta_disks;
                if (!restart)
                        /* Make sure the array isn't read-only */
                        ioctl(fd, RESTART_ARRAY_RW, 0);
@@ -1903,12 +2104,24 @@ static int reshape_array(char *container, int fd, char *devname,
            (reshape.level != info->array.level ||
             reshape.before.layout != info->array.layout ||
             reshape.before.data_disks + reshape.parity
-            != info->array.raid_disks - info->delta_disks)) {
+            != info->array.raid_disks - max(0, info->delta_disks))) {
                fprintf(stderr, Name ": reshape info is not in native format -"
                        " cannot continue.\n");
                goto release;
        }
 
+       if (st->ss->external && restart && (info->reshape_progress == 0)) {
+               /* When reshape is restarted from '0', very begin of array
+                * it is possible that for external metadata reshape and array
+                * configuration doesn't happen.
+                * Check if md has the same opinion, and reshape is restarted
+                * from 0. If so, this is regular reshape start after reshape
+                * switch in metadata to next array only.
+                */
+               if ((verify_reshape_position(info, reshape.level) >= 0) &&
+                   (info->reshape_progress == 0))
+                       restart = 0;
+       }
        if (restart) {
                /* reshape already started. just skip to monitoring the reshape */
                if (reshape.backup_blocks == 0)
@@ -1937,7 +2150,7 @@ static int reshape_array(char *container, int fd, char *devname,
                        " and only have %d.\n"
                        "       Use --force to over-ride this check.\n",
                        spares_needed,
-                       spares_needed == 1 ? "" : "s", 
+                       spares_needed == 1 ? "" : "s",
                        info->array.spare_disks + added_disks);
                goto release;
        }
@@ -1951,7 +2164,7 @@ static int reshape_array(char *container, int fd, char *devname,
                        Name ": Need %d spare%s to create working array,"
                        " and only have %d.\n",
                        spares_needed,
-                       spares_needed == 1 ? "" : "s", 
+                       spares_needed == 1 ? "" : "s",
                        info->array.spare_disks + added_disks);
                goto release;
        }
@@ -1967,7 +2180,7 @@ static int reshape_array(char *container, int fd, char *devname,
                        err = errno;
                        fprintf(stderr, Name ": %s: could not set level to %s\n",
                                devname, c);
-                       if (err == EBUSY && 
+                       if (err == EBUSY &&
                            (info->array.state & (1<<MD_SB_BITMAP_PRESENT)))
                                fprintf(stderr, "       Bitmap must be removed"
                                        " before level can be changed\n");
@@ -1975,12 +2188,15 @@ static int reshape_array(char *container, int fd, char *devname,
                }
                if (!quiet)
                        fprintf(stderr, Name ": level of %s changed to %s\n",
-                               devname, c);    
+                               devname, c);
                orig_level = array.level;
                sysfs_freeze_array(info);
 
                if (reshape.level > 0 && st->ss->external) {
                        /* make sure mdmon is aware of the new level */
+                       if (mdmon_running(st->container_dev))
+                               flush_mdmon(container);
+
                        if (!mdmon_running(st->container_dev))
                                start_mdmon(st->container_dev);
                        ping_monitor(container);
@@ -2083,7 +2299,7 @@ static int reshape_array(char *container, int fd, char *devname,
         * 1/ The array will shrink.
         *    We need to ensure the reshape will pause before reaching
         *    the 'critical section'.  We also need to fork and wait for
-        *    that to happen.  When it does we 
+        *    that to happen.  When it does we
         *       suspend/backup/complete/unfreeze
         *
         * 2/ The array will not change size.
@@ -2136,7 +2352,7 @@ started:
         * unit.  The number we have so far is just a minimum
         */
        blocks = reshape.backup_blocks;
-       if (reshape.before.data_disks == 
+       if (reshape.before.data_disks ==
            reshape.after.data_disks) {
                /* Make 'blocks' bigger for better throughput, but
                 * not so big that we reject it below.
@@ -2226,9 +2442,16 @@ started:
 
        sra->new_chunk = info->new_chunk;
 
-       if (restart)
+       if (restart) {
+               /* for external metadata checkpoint saved by mdmon can be lost
+                * or missed /due to e.g. crash/. Check if md is not during
+                * restart farther than metadata points to.
+                * If so, this means metadata information is obsolete.
+                */
+               if (st->ss->external)
+                       verify_reshape_position(info, reshape.level);
                sra->reshape_progress = info->reshape_progress;
-       else {
+       else {
                sra->reshape_progress = 0;
                if (reshape.after.data_disks < reshape.before.data_disks)
                        /* start from the end of the new array */
@@ -2250,7 +2473,7 @@ started:
                                Name ": Cannot set device shape for %s: %s\n",
                                devname, strerror(errno));
 
-                       if (err == EBUSY && 
+                       if (err == EBUSY &&
                            (array.state & (1<<MD_SB_BITMAP_PRESENT)))
                                fprintf(stderr,
                                        "       Bitmap must be removed before"
@@ -2265,18 +2488,18 @@ started:
                int err = 0;
                if (sysfs_set_num(sra, NULL, "chunk_size", info->new_chunk) < 0)
                        err = errno;
-               if (!err && sysfs_set_num(sra, NULL, "layout", 
-                                        reshape.after.layout) < 0)
+               if (!err && sysfs_set_num(sra, NULL, "layout",
+                                         reshape.after.layout) < 0)
                        err = errno;
                if (!err && subarray_set_num(container, sra, "raid_disks",
-                                           reshape.after.data_disks +
-                                           reshape.parity) < 0)
+                                            reshape.after.data_disks +
+                                            reshape.parity) < 0)
                        err = errno;
                if (err) {
                        fprintf(stderr, Name ": Cannot set device shape for %s\n",
                                devname);
 
-                       if (err == EBUSY && 
+                       if (err == EBUSY &&
                            (array.state & (1<<MD_SB_BITMAP_PRESENT)))
                                fprintf(stderr,
                                        "       Bitmap must be removed before"
@@ -2285,10 +2508,10 @@ started:
                }
        }
 
-       err = start_reshape(sra, restart,
-                           info->array.raid_disks - reshape.parity);
+       err = start_reshape(sra, restart, reshape.before.data_disks,
+                           reshape.after.data_disks);
        if (err) {
-               fprintf(stderr, 
+               fprintf(stderr,
                        Name ": Cannot %s reshape for %s\n",
                        restart ? "continue" : "start",
                        devname);
@@ -2301,7 +2524,7 @@ started:
                free(offsets);
                sysfs_free(sra);
                fprintf(stderr, Name ": Reshape has to be continued from"
-                       " location %llu when root fileststem has been mounted\n",
+                       " location %llu when root filesystem has been mounted.\n",
                        sra->reshape_progress);
                return 1;
        }
@@ -2374,7 +2597,7 @@ started:
                /* Re-load the metadata as much could have changed */
                int cfd = open_dev(st->container_dev);
                if (cfd >= 0) {
-                       ping_monitor(container);
+                       flush_mdmon(container);
                        st->ss->free_super(st);
                        st->ss->load_container(st, cfd, container);
                        close(cfd);
@@ -2386,35 +2609,8 @@ started:
         */
        if (reshape.before.data_disks !=
            reshape.after.data_disks &&
-           info->custom_array_size) {
-               struct mdinfo *info2;
-               char *subarray = strchr(info->text_version+1, '/')+1;
-
-               info2 = st->ss->container_content(st, subarray);
-               if (info2) {
-                       unsigned long long current_size = 0;
-                       unsigned long long new_size =
-                               info2->custom_array_size/2;
-
-                       if (sysfs_get_ll(sra,
-                                        NULL,
-                                        "array_size",
-                                        &current_size) == 0 &&
-                           new_size > current_size) {
-                               if (sysfs_set_num(sra, NULL,
-                                                 "array_size", new_size)
-                                   < 0)
-                                       dprintf("Error: Cannot"
-                                               " set array size");
-                               else
-                                       dprintf("Array size "
-                                               "changed");
-                               dprintf(" from %llu to %llu.\n",
-                                       current_size, new_size);
-                       }
-                       sysfs_free(info2);
-               }
-       }
+           info->custom_array_size)
+               set_array_size(st, info, info->text_version);
 
        if (info->new_level != reshape.level) {
 
@@ -2454,7 +2650,7 @@ release:
  */
 int reshape_container(char *container, char *devname,
                      int mdfd,
-                     struct supertype *st, 
+                     struct supertype *st,
                      struct mdinfo *info,
                      int force,
                      char *backup_file,
@@ -2462,6 +2658,7 @@ int reshape_container(char *container, char *devname,
 {
        struct mdinfo *cc = NULL;
        int rv = restart;
+       int last_devnum = -1;
 
        /* component_size is not meaningful for a container,
         * so pass '-1' meaning 'no change'
@@ -2470,7 +2667,8 @@ int reshape_container(char *container, char *devname,
            reshape_super(st, -1, info->new_level,
                          info->new_layout, info->new_chunk,
                          info->array.raid_disks, info->delta_disks,
-                         backup_file, devname, quiet)) {
+                         backup_file, devname, APPLY_METADATA_CHANGES,
+                         quiet)) {
                unfreeze(st);
                return 1;
        }
@@ -2532,22 +2730,54 @@ int reshape_container(char *container, char *devname,
                                                  devname2devnum(container));
                        if (!mdstat)
                                continue;
+                       if (mdstat->active == 0) {
+                               fprintf(stderr, Name ": Skipping inactive "
+                                       "array md%i.\n", mdstat->devnum);
+                               free_mdstat(mdstat);
+                               mdstat = NULL;
+                               continue;
+                       }
                        break;
                }
                if (!content)
                        break;
 
-               fd = open_dev(mdstat->devnum);
-               if (fd < 0)
-                       break;
                adev = map_dev(dev2major(mdstat->devnum),
                               dev2minor(mdstat->devnum),
                               0);
                if (!adev)
                        adev = content->text_version;
 
+               fd = open_dev(mdstat->devnum);
+               if (fd < 0) {
+                       printf(Name ": Device %s cannot be opened for reshape.",
+                              adev);
+                       break;
+               }
+
+               if (last_devnum == mdstat->devnum) {
+                       /* Do not allow for multiple reshape_array() calls for
+                        * the same array.
+                        * It can happen when reshape_array() returns without
+                        * error, when reshape is not finished (wrong reshape
+                        * starting/continuation conditions).  Mdmon doesn't
+                        * switch to next array in container and reentry
+                        * conditions for the same array occur.
+                        * This is possibly interim until the behaviour of
+                        * reshape_array is resolved().
+                        */
+                       printf(Name ": Multiple reshape execution detected for "
+                              "device  %s.", adev);
+                       close(fd);
+                       break;
+               }
+               last_devnum = mdstat->devnum;
+
                sysfs_init(content, fd, mdstat->devnum);
 
+               if (mdmon_running(devname2devnum(container)))
+                       flush_mdmon(container);
+
                rv = reshape_array(container, fd, adev, st,
                                   content, force, NULL,
                                   backup_file, quiet, 1, restart,
@@ -2562,6 +2792,9 @@ int reshape_container(char *container, char *devname,
                restart = 0;
                if (rv)
                        break;
+
+               if (mdmon_running(devname2devnum(container)))
+                       flush_mdmon(container);
        }
        if (!rv)
                unfreeze(st);
@@ -2590,7 +2823,7 @@ int reshape_container(char *container, char *devname,
  * suspend/backup/allow always come together
  * wait/resume/discard do too.
  * For the same-size case we have two backups to improve flow.
- * 
+ *
  */
 
 int progress_reshape(struct mdinfo *info, struct reshape *reshape,
@@ -2735,7 +2968,7 @@ int progress_reshape(struct mdinfo *info, struct reshape *reshape,
         * this much.
         */
        target = 64*1024*2 * min(reshape->before.data_disks,
-                                 reshape->after.data_disks);
+                                reshape->after.data_disks);
        target /= reshape->backup_blocks;
        if (target < 2)
                target = 2;
@@ -2872,7 +3105,7 @@ int progress_reshape(struct mdinfo *info, struct reshape *reshape,
                        - completed;
        }
        *reshape_completed = completed;
-       
+
        close(fd);
 
        /* We return the need_backup flag.  Caller will decide
@@ -2932,7 +3165,6 @@ check_progress:
        }
 }
 
-
 /* FIXME return status is never checked */
 static int grow_backup(struct mdinfo *sra,
                unsigned long long offset, /* per device */
@@ -3002,7 +3234,7 @@ static int grow_backup(struct mdinfo *sra,
                else
                        lseek64(destfd[i], destoffsets[i], 0);
 
-       rv = save_stripes(sources, offsets, 
+       rv = save_stripes(sources, offsets,
                          disks, chunk, level, layout,
                          dests, destfd,
                          offset*512*odata, stripes * chunk * odata,
@@ -3050,11 +3282,11 @@ static int grow_backup(struct mdinfo *sra,
  * every works.
  */
 /* FIXME return value is often ignored */
-static int forget_backup(
-               int dests, int *destfd, unsigned long long *destoffsets,
-               int part)
+static int forget_backup(int dests, int *destfd,
+                        unsigned long long *destoffsets,
+                        int part)
 {
-       /* 
+       /*
         * Erase backup 'part' (which is 0 or 1)
         */
        int i;
@@ -3078,7 +3310,7 @@ static int forget_backup(
                if ((unsigned long long)lseek64(destfd[i], destoffsets[i]-4096, 0) !=
                    destoffsets[i]-4096)
                        rv = -1;
-               if (rv == 0 && 
+               if (rv == 0 &&
                    write(destfd[i], &bsb, 512) != 512)
                        rv = -1;
                fsync(destfd[i]);
@@ -3114,7 +3346,7 @@ static void validate(int afd, int bfd, unsigned long long offset)
                fail("magic is bad");
        if (memcmp(bsb2.magic, "md_backup_data-2", 16) == 0 &&
            bsb2.sb_csum2 != bsb_csum((char*)&bsb2,
-                                    ((char*)&bsb2.sb_csum2)-((char*)&bsb2)))
+                                     ((char*)&bsb2.sb_csum2)-((char*)&bsb2)))
                fail("second csum bad");
 
        if (__le64_to_cpu(bsb2.devstart)*512 != offset)
@@ -3144,7 +3376,7 @@ static void validate(int afd, int bfd, unsigned long long offset)
                if ((unsigned long long)read(afd, abuf, len) != len)
                        fail("read first from array failed");
                if (memcmp(bbuf, abuf, len) != 0) {
-                       #if 0
+#if 0
                        int i;
                        printf("offset=%llu len=%llu\n",
                               (unsigned long long)__le64_to_cpu(bsb2.arraystart)*512, len);
@@ -3153,7 +3385,7 @@ static void validate(int afd, int bfd, unsigned long long offset)
                                        printf("first diff byte %d\n", i);
                                        break;
                                }
-                       #endif
+#endif
                        fail("data1 compare failed");
                }
        }
@@ -3504,7 +3736,7 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt
                                nonew:
                                        if (verbose)
                                                fprintf(stderr, Name
-                  ": backup-metadata found on %s but is not needed\n", devname);
+                                                       ": backup-metadata found on %s but is not needed\n", devname);
                                        continue; /* No new data here */
                                }
                        } else {
@@ -3539,7 +3771,7 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt
                second_fail:
                        if (verbose)
                                fprintf(stderr, Name
-                    ": Failed to verify secondary backup-metadata block on %s\n",
+                                       ": Failed to verify secondary backup-metadata block on %s\n",
                                        devname);
                        continue; /* Cannot seek */
                }
@@ -3583,7 +3815,7 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt
                        free(offsets);
                        return 1;
                }
-               
+
                if (bsb.magic[15] == '2' &&
                    restore_stripes(fdlist, offsets,
                                    info->array.raid_disks,
@@ -3712,8 +3944,6 @@ int Grow_continue_command(char *devname, int fd,
        char buf[40];
        int cfd = -1;
        int fd2 = -1;
-       char *ep;
-       unsigned long long position;
 
        dprintf("Grow continue from command line called for %s\n",
                devname);
@@ -3803,6 +4033,13 @@ int Grow_continue_command(char *devname, int fd,
                        mdstat = mdstat_by_subdev(array, container_dev);
                        if (!mdstat)
                                continue;
+                       if (mdstat->active == 0) {
+                               fprintf(stderr, Name ": Skipping inactive "
+                                       "array md%i.\n", mdstat->devnum);
+                               free_mdstat(mdstat);
+                               mdstat = NULL;
+                               continue;
+                       }
                        break;
                }
                if (!content) {
@@ -3841,28 +4078,8 @@ int Grow_continue_command(char *devname, int fd,
        /* verify that array under reshape is started from
         * correct position
         */
-       ret_val = sysfs_get_str(content, NULL, "sync_max", buf, 40);
-       if (ret_val <= 0) {
-               fprintf(stderr, Name
-                       ": cannot open verify reshape progress for %s (%i)\n",
-                       content->sys_name, ret_val);
-               ret_val = 1;
-               goto Grow_continue_command_exit;
-       }
-       dprintf(Name ": Read sync_max sysfs entry is: %s\n", buf);
-       position = strtoull(buf, &ep, 0);
-       if (ep == buf || (*ep != 0 && *ep != '\n' && *ep != ' ')) {
-               fprintf(stderr, Name ": Fatal error: array reshape was"
-                       " not properly frozen\n");
-               ret_val = 1;
-               goto Grow_continue_command_exit;
-       }
-       position *= get_data_disks(map_name(pers, mdstat->level),
-                                  content->new_layout,
-                                  content->array.raid_disks);
-       if (position != content->reshape_progress) {
-               fprintf(stderr, Name ": Fatal error: array reshape was"
-                       " not properly frozen.\n");
+       if (verify_reshape_position(content,
+                                   map_name(pers, mdstat->level)) < 0) {
                ret_val = 1;
                goto Grow_continue_command_exit;
        }