mdadm: fix wrong condition for go to abort

[thirdparty/mdadm.git] / Manage.c
diff --git a/Manage.c b/Manage.c

index 2e602d74aa821ee17e70c5b00a735505616d3b1b..494cca9248e763ab1db475b3b7b7f75de9d7a339 100644 (file)
--- a/Manage.c
+++ b/Manage.c
@@ -216,9 +216,6 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                 return 1;
         }
  
-       /* If this is an mdmon managed array, just write 'inactive'
-        * to the array state and let mdmon clear up.
-        */
         strcpy(devnm, fd2devnm(fd));
         /* Get EXCL access first.  If this fails, then attempting
          * to stop is probably a bad idea.
@@ -235,13 +232,17 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                 container[0] = 0;
         close(fd);
         count = 5;
-       while (((fd = ((devnm[0] == '/')
+       while (((fd = ((devname[0] == '/')
                        ?open(devname, O_RDONLY|O_EXCL)
                        :open_dev_flags(devnm, O_RDONLY|O_EXCL))) < 0
                 || strcmp(fd2devnm(fd), devnm) != 0)
                && container[0]
                && mdmon_running(container)
                && count) {
+               /* Can't open, so something might be wrong.  However it
+                * is a container, so we might be racing with mdmon, so
+                * retry for a bit.
+                */
                 if (fd >= 0)
                         close(fd);
                 flush_mdmon(container);
@@ -255,6 +256,9 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                                devname);
                 return 1;
         }
+       /* If this is an mdmon managed array, just write 'inactive'
+        * to the array state and let mdmon clear up.
+        */
         if (mdi &&
             mdi->array.level > 0 &&
             is_subarray(mdi->text_version)) {
@@ -262,7 +266,7 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                 /* This is mdmon managed. */
                 close(fd);
  
-               /* As we have an O_EXCL open, any use of the device
+               /* As we had an O_EXCL open, any use of the device
                  * which blocks STOP_ARRAY is probably a transient use,
                  * so it is reasonable to retry for a while - 5 seconds.
                  */
@@ -316,7 +320,7 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                                                        devnm)) {
                                 if (verbose >= 0)
                                         pr_err("Cannot stop container %s: member %s still active\n",
-                                              devname, m->dev);
+                                              devname, m->devnm);
                                 free_mdstat(mds);
                                 rv = 1;
                                 goto out;
@@ -340,9 +344,7 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
             sysfs_attribute_available(mdi, NULL, "reshape_direction") &&
             sysfs_get_str(mdi, NULL, "sync_action", buf, 20) > 0 &&
             strcmp(buf, "reshape\n") == 0 &&
-           sysfs_get_two(mdi, NULL, "raid_disks", &rd1, &rd2) == 2 &&
-           sysfs_set_str(mdi, NULL, "sync_action", "frozen") == 0) {
-               /* Array is frozen */
+           sysfs_get_two(mdi, NULL, "raid_disks", &rd1, &rd2) == 2) {
                 unsigned long long position, curr;
                 unsigned long long chunk1, chunk2;
                 unsigned long long rddiv, chunkdiv;
@@ -353,12 +355,28 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                 int delay;
                 int scfd;
  
+               delay = 40;
+               while (rd1 > rd2 && delay > 0 &&
+                      sysfs_get_ll(mdi, NULL, "sync_max", &old_sync_max) == 0) {
+                       /* must be in the critical section - wait a bit */
+                       delay -= 1;
+                       usleep(100000);
+               }
+
+               if (sysfs_set_str(mdi, NULL, "sync_action", "frozen") != 0)
+                       goto done;
+               /* Array is frozen */
+
                 rd1 -= mdi->array.level == 6 ? 2 : 1;
                 rd2 -= mdi->array.level == 6 ? 2 : 1;
                 sysfs_get_str(mdi, NULL, "reshape_direction", buf, sizeof(buf));
                 if (strncmp(buf, "back", 4) == 0)
                         backwards = 1;
-               sysfs_get_ll(mdi, NULL, "reshape_position", &position);
+               if (sysfs_get_ll(mdi, NULL, "reshape_position", &position) != 0) {
+                       /* reshape must have finished now */
+                       sysfs_set_str(mdi, NULL, "sync_action", "idle");
+                       goto done;
+               }
                 sysfs_get_two(mdi, NULL, "chunk_size", &chunk1, &chunk2);
                 chunk1 /= 512;
                 chunk2 /= 512;
@@ -375,9 +393,20 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                         size &= ~(chunk1-1);
                         size &= ~(chunk2-1);
                         /* rd1 must be smaller */
+                       /* Reshape may have progressed further backwards than
+                        * recorded, so target even further back (hence "-1")
+                        */
                         position = (position / sectors - 1) * sectors;
+                       /* rd1 is always the conversion factor between 'sync'
+                        * position and 'reshape' position.
+                        * We read 1 "new" stripe worth of data from where-ever,
+                        * and when write out that full stripe.
+                        */
                         sync_max = size - position/rd1;
                 } else {
+                       /* Reshape will very likely be beyond position, and it may
+                        * be too late to stop at '+1', so aim for '+2'
+                        */
                         position = (position / sectors + 2) * sectors;
                         sync_max = position/rd1;
                 }
@@ -400,6 +429,7 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                 delay = 3000;
                 scfd = sysfs_open(mdi->sys_name, NULL, "sync_completed");
                 while (scfd >= 0 && delay > 0 && old_sync_max > 0) {
+                       unsigned long long max_completed;
                         sysfs_get_ll(mdi, NULL, "reshape_position", &curr);
                         sysfs_fd_get_str(scfd, buf, sizeof(buf));
                         if (strncmp(buf, "none", 4) == 0) {
@@ -413,7 +443,10 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                                         break;
                         }
  
-                       if (sysfs_fd_get_ll(scfd, &completed) == 0 &&
+                       if (sysfs_fd_get_two(scfd, &completed,
+                                            &max_completed) == 2 &&
+                           /* 'completed' sometimes reads as max-uulong */
+                           completed < max_completed &&
                             (completed > sync_max ||
                              (completed == sync_max && curr != position))) {
                                 while (completed > sync_max) {
@@ -437,6 +470,7 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                         close(scfd);
  
         }
+done:
  
         /* As we have an O_EXCL open, any use of the device
          * which blocks STOP_ARRAY is probably a transient use,
@@ -690,7 +724,8 @@ skip_re_add:
  int Manage_add(int fd, int tfd, struct mddev_dev *dv,
                struct supertype *tst, mdu_array_info_t *array,
                int force, int verbose, char *devname,
-              char *update, unsigned long rdev, unsigned long long array_size)
+              char *update, unsigned long rdev, unsigned long long array_size,
+              int raid_slot)
  {
         unsigned long long ldsize;
         struct supertype *dev_st = NULL;
@@ -880,7 +915,10 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
         }
         disc.major = major(rdev);
         disc.minor = minor(rdev);
-       disc.number =j;
+       if (raid_slot < 0)
+               disc.number = j;
+       else
+               disc.number = raid_slot;
         disc.state = 0;
         if (array->not_persistent==0) {
                 int dfd;
@@ -921,6 +959,14 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
                         }
                 free(used);
         }
+
+       if (array->state & (1 << MD_SB_CLUSTERED)) {
+               if (dv->disposition == 'c')
+                       disc.state |= (1 << MD_DISK_CANDIDATE);
+               else
+                       disc.state |= (1 << MD_DISK_CLUSTER_ADD);
+       }
+
         if (dv->writemostly == 1)
                 disc.state |= (1 << MD_DISK_WRITEMOSTLY);
         if (tst->ss->external) {
@@ -1240,6 +1286,7 @@ int Manage_subdevs(char *devname, int fd,
          *        variant on 'A'
          *  'F' - Another variant of 'A', where the device was faulty
          *        so must be removed from the array first.
+        *  'c' - confirm the device as found (for clustered environments)
          *
          * For 'f' and 'r', the device can also be a kernel-internal
          * name such as 'sdb'.
@@ -1255,6 +1302,7 @@ int Manage_subdevs(char *devname, int fd,
         struct mdinfo info;
         int frozen = 0;
         int busy = 0;
+       int raid_slot = -1;
  
         if (ioctl(fd, GET_ARRAY_INFO, &array)) {
                 pr_err("Cannot get array info for %s\n",
@@ -1283,6 +1331,17 @@ int Manage_subdevs(char *devname, int fd,
                 int rv;
                 int mj,mn;
  
+               raid_slot = -1;
+               if (dv->disposition == 'c') {
+                       rv = parse_cluster_confirm_arg(dv->devname,
+                                                      &dv->devname,
+                                                      &raid_slot);
+                       if (rv) {
+                               pr_err("Could not get the devname of cluster\n");
+                               goto abort;
+                       }
+               }
+
                 if (strcmp(dv->devname, "failed") == 0 ||
                     strcmp(dv->devname, "faulty") == 0) {
                         if (dv->disposition != 'A'
@@ -1308,6 +1367,11 @@ int Manage_subdevs(char *devname, int fd,
                 if (strcmp(dv->devname, "missing") == 0) {
                         struct mddev_dev *add_devlist = NULL;
                         struct mddev_dev **dp;
+                       if (dv->disposition == 'c') {
+                               rv = ioctl(fd, CLUSTERED_DISK_NACK, NULL);
+                               break;
+                       }
+
                         if (dv->disposition != 'A') {
                                 pr_err("'missing' only meaningful with --re-add\n");
                                 goto abort;
@@ -1438,6 +1502,7 @@ int Manage_subdevs(char *devname, int fd,
                 case 'A':
                 case 'M': /* --re-add missing */
                 case 'F': /* --re-add faulty  */
+               case 'c': /* --cluster-confirm */
                         /* add the device */
                         if (subarray) {
                                 pr_err("Cannot add disks to a \'member\' array, perform this operation on the parent container\n");
@@ -1471,7 +1536,7 @@ int Manage_subdevs(char *devname, int fd,
                         }
                         rv = Manage_add(fd, tfd, dv, tst, &array,
                                         force, verbose, devname, update,
-                                       rdev, array_size);
+                                       rdev, array_size, raid_slot);
                         close(tfd);
                         tfd = -1;
                         if (rv < 0)