]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - Grow.c
FIX: Check correctly raid disks during reshape restart
[thirdparty/mdadm.git] / Grow.c
diff --git a/Grow.c b/Grow.c
index e321a391d4754d1e9e699f728085c758d2413ff4..9c630369a11f72061853c6caa8d8e9370330a40f 100644 (file)
--- a/Grow.c
+++ b/Grow.c
@@ -952,13 +952,17 @@ char *analyse_change(struct mdinfo *info, struct reshape *re)
                }
                if (info->array.raid_disks == 2 &&
                    info->new_level == 5) {
-                       if (info->delta_disks != UnSet &&
-                           info->delta_disks != 0)
-                               return "Cannot change number of disks "
-                                       "with RAID1->RAID5 conversion";
+
                        re->level = 5;
                        re->before.data_disks = 1;
-                       re->after.data_disks = 1;
+                       if (info->delta_disks != UnSet &&
+                           info->delta_disks != 0)
+                               re->after.data_disks = 1 + info->delta_disks;
+                       else
+                               re->after.data_disks = 1;
+                       if (re->after.data_disks < 1)
+                               return "Number of disks too small for RAID5";
+
                        re->before.layout = ALGORITHM_LEFT_SYMMETRIC;
                        info->array.chunk_size = 65536;
                        break;
@@ -1269,18 +1273,20 @@ char *analyse_change(struct mdinfo *info, struct reshape *re)
 
 static int reshape_array(char *container, int fd, char *devname,
                         struct supertype *st, struct mdinfo *info,
-                        int force, char *backup_file, int quiet, int forked,
+                        int force, struct mddev_dev *devlist,
+                        char *backup_file, int quiet, int forked,
                         int restart);
-static int reshape_container(char *container, int cfd, char *devname,
+static int reshape_container(char *container, char *devname,
                             struct supertype *st, 
                             struct mdinfo *info,
                             int force,
                             char *backup_file,
-                            int quiet);
+                            int quiet, int restart);
 
 int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                 long long size,
                 int level, char *layout_str, int chunksize, int raid_disks,
+                struct mddev_dev *devlist,
                 int force)
 {
        /* Make some changes in the shape of an array.
@@ -1311,6 +1317,9 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
        char container_buf[20];
        int cfd = -1;
 
+       struct mddev_dev *dv;
+       int added_disks;
+
        struct mdinfo info;
        struct mdinfo *sra;
 
@@ -1386,10 +1395,13 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
 
                if (mdmon_running(container_dev))
                        st->update_tail = &st->updates;
-       } 
+       }
 
+       added_disks = 0;
+       for (dv = devlist; dv; dv = dv->next)
+               added_disks++;
        if (raid_disks > array.raid_disks &&
-           array.spare_disks < (raid_disks - array.raid_disks) &&
+           array.spare_disks +added_disks < (raid_disks - array.raid_disks) &&
            !force) {
                fprintf(stderr,
                        Name ": Need %d spare%s to avoid degraded array,"
@@ -1397,7 +1409,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                        "       Use --force to over-ride this check.\n",
                        raid_disks - array.raid_disks, 
                        raid_disks - array.raid_disks == 1 ? "" : "s", 
-                       array.spare_disks);
+                       array.spare_disks + added_disks);
                return 1;
        }
 
@@ -1496,13 +1508,17 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                ping_monitor(container);
        }
 
+       memset(&info, 0, sizeof(info));
        info.array = array;
        sysfs_init(&info, fd, NoMdDev);
        strcpy(info.text_version, sra->text_version);
        info.component_size = size*2;
        info.new_level = level;
        info.new_chunk = chunksize * 1024;
-       if (raid_disks)
+       if (info.array.level == LEVEL_CONTAINER) {
+               info.delta_disks = UnSet;
+               info.array.raid_disks = raid_disks;
+       } else if (raid_disks)
                info.delta_disks = raid_disks - info.array.raid_disks;
        else
                info.delta_disks = UnSet;
@@ -1578,8 +1594,8 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                 * number of devices (On-Line Capacity Expansion) must be
                 * performed at the level of the container
                 */
-               rv = reshape_container(container, fd, devname, st, &info,
-                                      force, backup_file, quiet);
+               rv = reshape_container(container, devname, st, &info,
+                                      force, backup_file, quiet, 0);
                frozen = 0;
        } else {
                /* get spare devices from external metadata
@@ -1607,7 +1623,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                }
                sync_metadata(st);
                rv = reshape_array(container, fd, devname, st, &info, force,
-                                  backup_file, quiet, 0, 0);
+                                  devlist, backup_file, quiet, 0, 0);
                frozen = 0;
        }
 release:
@@ -1618,7 +1634,7 @@ release:
 
 static int reshape_array(char *container, int fd, char *devname,
                         struct supertype *st, struct mdinfo *info,
-                        int force,
+                        int force, struct mddev_dev *devlist,
                         char *backup_file, int quiet, int forked,
                         int restart)
 {
@@ -1631,6 +1647,9 @@ static int reshape_array(char *container, int fd, char *devname,
        struct mdu_array_info_s array;
        char *c;
 
+       struct mddev_dev *dv;
+       int added_disks;
+
        int *fdlist;
        unsigned long long *offsets;
        int d;
@@ -1642,13 +1661,41 @@ static int reshape_array(char *container, int fd, char *devname,
        int done;
        struct mdinfo *sra = NULL;
 
-       msg = analyse_change(info, &reshape);
+       /* when reshaping a RAID0, the component_size might be zero.
+        * So try to fix that up.
+        */
+       if (ioctl(fd, GET_ARRAY_INFO, &array) != 0) {
+               dprintf("Cannot get array information.\n");
+               goto release;
+       }
+       if (array.level == 0 && info->component_size == 0) {
+               get_dev_size(fd, NULL, &array_size);
+               info->component_size = array_size / array.raid_disks;
+       }
+
+       if (info->reshape_active) {
+               int new_level = info->new_level;
+               info->new_level = UnSet;
+               info->array.raid_disks -= info->delta_disks;
+               msg = analyse_change(info, &reshape);
+               info->new_level = new_level;
+               info->array.raid_disks += info->delta_disks;
+               if (!restart)
+                       /* Make sure the array isn't read-only */
+                       ioctl(fd, RESTART_ARRAY_RW, 0);
+       } else
+               msg = analyse_change(info, &reshape);
        if (msg) {
                fprintf(stderr, Name ": %s\n", msg);
                goto release;
        }
-       if (ioctl(fd, GET_ARRAY_INFO, &array) != 0) {
-               dprintf("Cannot get array information.\n");
+       if (restart &&
+           (reshape.level != info->array.level ||
+            reshape.before.layout != info->array.layout ||
+            reshape.before.data_disks + reshape.parity
+            != info->array.raid_disks - info->delta_disks)) {
+               fprintf(stderr, Name ": reshape info is not in native format -"
+                       " cannot continue.\n");
                goto release;
        }
 
@@ -1664,24 +1711,42 @@ static int reshape_array(char *container, int fd, char *devname,
         * freeze_array and freeze_container.
         */
        sysfs_freeze_array(info);
+       /* Check we have enough spares to not be degraded */
+       added_disks = 0;
+       for (dv = devlist; dv ; dv=dv->next)
+               added_disks++;
        spares_needed = max(reshape.before.data_disks,
                            reshape.after.data_disks)
                + reshape.parity - array.raid_disks;
 
        if (!force &&
-           info->new_level > 1 &&
-           spares_needed > info->array.spare_disks) {
+           info->new_level > 1 && info->array.level > 1 &&
+           spares_needed > info->array.spare_disks + added_disks) {
                fprintf(stderr,
                        Name ": Need %d spare%s to avoid degraded array,"
                        " and only have %d.\n"
                        "       Use --force to over-ride this check.\n",
                        spares_needed,
                        spares_needed == 1 ? "" : "s", 
-                       info->array.spare_disks);
+                       info->array.spare_disks + added_disks);
+               goto release;
+       }
+       /* Check we have enough spares to not fail */
+       spares_needed = max(reshape.before.data_disks,
+                           reshape.after.data_disks)
+               - array.raid_disks;
+       if ((info->new_level > 1 || info->new_level == 0) &&
+           spares_needed > info->array.spare_disks +added_disks) {
+               fprintf(stderr,
+                       Name ": Need %d spare%s to create working array,"
+                       " and only have %d.\n",
+                       spares_needed,
+                       spares_needed == 1 ? "" : "s", 
+                       info->array.spare_disks + added_disks);
                goto release;
        }
 
-       if (reshape.level != info->array.level) {
+       if (reshape.level != array.level) {
                char *c = map_num(pers, reshape.level);
                int err;
                if (c == NULL)
@@ -1701,7 +1766,7 @@ static int reshape_array(char *container, int fd, char *devname,
                if (!quiet)
                        fprintf(stderr, Name ": level of %s changed to %s\n",
                                devname, c);    
-               orig_level = info->array.level;
+               orig_level = array.level;
                sysfs_freeze_array(info);
 
                if (reshape.level > 0 && st->ss->external) {
@@ -1724,6 +1789,15 @@ static int reshape_array(char *container, int fd, char *devname,
 
                if (info2) {
                        sysfs_init(info2, fd, st->devnum);
+                       /* When increasing number of devices, we need to set
+                        * new raid_disks before adding these, or they might
+                        * be rejected.
+                        */
+                       if (reshape.backup_blocks &&
+                           reshape.after.data_disks > reshape.before.data_disks)
+                               subarray_set_num(container, info2, "raid_disks",
+                                                reshape.after.data_disks +
+                                                reshape.parity);
                        for (d = info2->devs; d; d = d->next) {
                                if (d->disk.state == 0 &&
                                    d->disk.raid_disk >= 0) {
@@ -1736,6 +1810,13 @@ static int reshape_array(char *container, int fd, char *devname,
                        sysfs_free(info2);
                }
        }
+       /* We might have been given some devices to add to the
+        * array.  Now that the array has been changed to the right
+        * level and frozen, we can safely add them.
+        */
+       if (devlist)
+               Manage_subdevs(devname, fd, devlist, !quiet,
+                              0,NULL);
 
        if (reshape.backup_blocks == 0) {
                /* No restriping needed, but we might need to impose
@@ -1743,7 +1824,7 @@ static int reshape_array(char *container, int fd, char *devname,
                 */
                /* read current array info */
                if (ioctl(fd, GET_ARRAY_INFO, &array) != 0) {
-                       dprintf("Canot get array information.\n");
+                       dprintf("Cannot get array information.\n");
                        goto release;
                }
                /* compare current array info with new values and if
@@ -1928,7 +2009,7 @@ started:
 
        sra->new_chunk = info->new_chunk;
 
-       if (info->reshape_active)
+       if (restart)
                sra->reshape_progress = info->reshape_progress;
        else {
                sra->reshape_progress = 0;
@@ -1944,7 +2025,7 @@ started:
                /* use SET_ARRAY_INFO but only if reshape hasn't started */
                ioctl(fd, GET_ARRAY_INFO, &array);
                array.raid_disks = reshape.after.data_disks + reshape.parity;
-               if (!info->reshape_active &&
+               if (!restart &&
                    ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
                        int err = errno;
 
@@ -1960,17 +2041,9 @@ started:
 
                        goto release;
                }
-       } else if (info->reshape_active && !st->ss->external) {
-               /* We don't need to set anything here for internal
-                * metadata, and for kernels before 2.6.38 we can
-                * fail if we try.
-                */
-       } else {
+       } else if (!restart) {
                /* set them all just in case some old 'new_*' value
                 * persists from some earlier problem.
-                * We even set them when restarting in the middle.  They will
-                * already be set in that case so this will be a no-op,
-                * but it is hard to tell the difference.
                 */
                int err = 0;
                if (sysfs_set_num(sra, NULL, "chunk_size", info->new_chunk) < 0)
@@ -1995,9 +2068,11 @@ started:
                }
        }
 
-       err = start_reshape(sra, (info->reshape_active && !st->ss->external));
+       err = start_reshape(sra, restart);
        if (err) {
-               fprintf(stderr, Name ": Cannot start reshape for %s\n",
+               fprintf(stderr, 
+                       Name ": Cannot %s reshape for %s\n",
+                       restart ? "continue" : "start",
                        devname);
                goto release;
        }
@@ -2134,19 +2209,21 @@ release:
        return 1;
 }
 
-int reshape_container(char *container, int cfd, char *devname,
+int reshape_container(char *container, char *devname,
                      struct supertype *st, 
                      struct mdinfo *info,
                      int force,
                      char *backup_file,
-                     int quiet)
+                     int quiet, int restart)
 {
        struct mdinfo *cc = NULL;
+       int rv = restart;
 
        /* component_size is not meaningful for a container,
         * so pass '-1' meaning 'no change'
         */
-       if (reshape_super(st, -1, info->new_level,
+       if (!restart &&
+           reshape_super(st, -1, info->new_level,
                          info->new_layout, info->new_chunk,
                          info->array.raid_disks, info->delta_disks,
                          backup_file, devname, quiet)) {
@@ -2179,9 +2256,12 @@ int reshape_container(char *container, int cfd, char *devname,
                 * reshape it.  reshape_array() will re-read the metadata
                 * so the next time through a different array should be
                 * ready for reshape.
+                * It is possible that the 'different' array will not
+                * be assembled yet.  In that case we simple exit.
+                * When it is assembled, the mdadm which assembles it
+                * will take over the reshape.
                 */
                struct mdinfo *content;
-               int rv;
                int fd;
                struct mdstat_ent *mdstat;
                char *adev;
@@ -2217,13 +2297,15 @@ int reshape_container(char *container, int cfd, char *devname,
                sysfs_init(content, fd, mdstat->devnum);
 
                rv = reshape_array(container, fd, adev, st,
-                                  content, force,
-                                  backup_file, quiet, 1, 0);
+                                  content, force, NULL,
+                                  backup_file, quiet, 1, restart);
                close(fd);
+               restart = 0;
                if (rv)
                        break;
        }
-       unfreeze(st);
+       if (!rv)
+               unfreeze(st);
        sysfs_free(cc);
        exit(0);
 }
@@ -2301,8 +2383,8 @@ int progress_reshape(struct mdinfo *info, struct reshape *reshape,
         *   1 if more data from backup_point - but only as far as suspend_point,
         *     should be backed up
         *   0 if things are progressing smoothly
-        *  -1 if the reshape is finished, either because it is all done,
-        *     or due to an error.
+        *  -1 if the reshape is finished because it is all done,
+        *  -2 if the reshape is finished due to an error.
         */
 
        int advancing = (reshape->after.data_disks
@@ -2854,7 +2936,7 @@ int child_monitor(int afd, struct mdinfo *sra, struct reshape *reshape,
                        continue;
                ok = st->ss->load_super(st, devfd, NULL);
                close(devfd);
-               if (ok >= 0)
+               if (ok == 0)
                        break;
        }
        if (!sd) {
@@ -2921,19 +3003,23 @@ int child_monitor(int afd, struct mdinfo *sra, struct reshape *reshape,
 
                /* Clear any backup region that is before 'here' */
                if (increasing) {
-                       if (reshape_completed >= (__le64_to_cpu(bsb.arraystart) +
+                       if (__le64_to_cpu(bsb.length) > 0 &&
+                           reshape_completed >= (__le64_to_cpu(bsb.arraystart) +
                                                  __le64_to_cpu(bsb.length)))
                                forget_backup(dests, destfd,
                                              destoffsets, 0);
-                       if (reshape_completed >= (__le64_to_cpu(bsb.arraystart2) +
+                       if (__le64_to_cpu(bsb.length2) > 0 &&
+                           reshape_completed >= (__le64_to_cpu(bsb.arraystart2) +
                                                  __le64_to_cpu(bsb.length2)))
                                forget_backup(dests, destfd,
                                              destoffsets, 1);
                } else {
-                       if (reshape_completed <= (__le64_to_cpu(bsb.arraystart)))
+                       if (__le64_to_cpu(bsb.length) > 0 &&
+                           reshape_completed <= (__le64_to_cpu(bsb.arraystart)))
                                forget_backup(dests, destfd,
                                              destoffsets, 0);
-                       if (reshape_completed <= (__le64_to_cpu(bsb.arraystart2)))
+                       if (__le64_to_cpu(bsb.length2) > 0 &&
+                           reshape_completed <= (__le64_to_cpu(bsb.arraystart2)))
                                forget_backup(dests, destfd,
                                              destoffsets, 1);
                }
@@ -2943,6 +3029,11 @@ int child_monitor(int afd, struct mdinfo *sra, struct reshape *reshape,
                                done = 1;
                        break;
                }
+               if (rv == 0 && increasing && !st->ss->external) {
+                       /* No longer need to monitor this reshape */
+                       done = 1;
+                       break;
+               }
 
                while (rv) {
                        unsigned long long offset;
@@ -3014,9 +3105,6 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt
        unsigned long long  nstripe, ostripe;
        int ndata, odata;
 
-       if (info->new_level != info->array.level)
-               return 1; /* Cannot handle level changes (they are instantaneous) */
-
        odata = info->array.raid_disks - info->delta_disks - 1;
        if (info->array.level == 6) odata--; /* number of data disks */
        ndata = info->array.raid_disks - 1;
@@ -3327,16 +3415,30 @@ int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info,
        char *container = NULL;
        int err;
 
-       if (!st->ss->external) {
-               err = sysfs_set_str(info, NULL, "array_state", "readonly");
-               if (err)
-                       return err;
-       } else {
+       err = sysfs_set_str(info, NULL, "array_state", "readonly");
+       if (err)
+               return err;
+       if (st->ss->external) {
                fmt_devname(buf, st->container_dev);
                container = buf;
+               freeze(st);
+
+               if (!mdmon_running(st->container_dev))
+                       start_mdmon(st->container_dev);
+               ping_monitor_by_id(st->container_dev);
+
+
+               if (info->reshape_active == 2) {
+                       int cfd = open_dev(st->container_dev);
+                       if (cfd < 0)
+                               return 1;
+                       st->ss->load_container(st, cfd, container);
+                       close(cfd);
+                       return reshape_container(container, NULL,
+                                                st, info, 0, backup_file,
+                                                0, 1);
+               }
        }
        return reshape_array(container, mdfd, "array", st, info, 1,
-                            backup_file, 0, 0, 1);
+                            NULL, backup_file, 0, 0, 1);
 }
-
-