]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - Manage.c
mdadm: fix wrong condition for go to abort
[thirdparty/mdadm.git] / Manage.c
index 059cf5b8cc83cac15ae52b25d1312f9d0dcc956f..494cca9248e763ab1db475b3b7b7f75de9d7a339 100644 (file)
--- a/Manage.c
+++ b/Manage.c
@@ -211,15 +211,11 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
        if (md_get_version(fd) < 9000) {
                if (ioctl(fd, STOP_MD, 0) == 0)
                        return 0;
-               pr_err("stopping device %s "
-                      "failed: %s\n",
+               pr_err("stopping device %s failed: %s\n",
                       devname, strerror(errno));
                return 1;
        }
 
-       /* If this is an mdmon managed array, just write 'inactive'
-        * to the array state and let mdmon clear up.
-        */
        strcpy(devnm, fd2devnm(fd));
        /* Get EXCL access first.  If this fails, then attempting
         * to stop is probably a bad idea.
@@ -236,13 +232,17 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                container[0] = 0;
        close(fd);
        count = 5;
-       while (((fd = ((devnm[0] == '/')
+       while (((fd = ((devname[0] == '/')
                       ?open(devname, O_RDONLY|O_EXCL)
                       :open_dev_flags(devnm, O_RDONLY|O_EXCL))) < 0
                || strcmp(fd2devnm(fd), devnm) != 0)
               && container[0]
               && mdmon_running(container)
               && count) {
+               /* Can't open, so something might be wrong.  However it
+                * is a container, so we might be racing with mdmon, so
+                * retry for a bit.
+                */
                if (fd >= 0)
                        close(fd);
                flush_mdmon(container);
@@ -252,13 +252,13 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                if (fd >= 0)
                        close(fd);
                if (verbose >= 0)
-                       pr_err("Cannot get exclusive access to %s:"
-                              "Perhaps a running "
-                              "process, mounted filesystem "
-                              "or active volume group?\n",
+                       pr_err("Cannot get exclusive access to %s:Perhaps a running process, mounted filesystem or active volume group?\n",
                               devname);
                return 1;
        }
+       /* If this is an mdmon managed array, just write 'inactive'
+        * to the array state and let mdmon clear up.
+        */
        if (mdi &&
            mdi->array.level > 0 &&
            is_subarray(mdi->text_version)) {
@@ -266,7 +266,7 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                /* This is mdmon managed. */
                close(fd);
 
-               /* As we have an O_EXCL open, any use of the device
+               /* As we had an O_EXCL open, any use of the device
                 * which blocks STOP_ARRAY is probably a transient use,
                 * so it is reasonable to retry for a while - 5 seconds.
                 */
@@ -293,8 +293,7 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                fd = open_dev_excl(devnm);
                if (fd < 0) {
                        if (verbose >= 0)
-                               pr_err("failed to completely stop %s"
-                                      ": Device is busy\n",
+                               pr_err("failed to completely stop %s: Device is busy\n",
                                       devname);
                        rv = 1;
                        goto out;
@@ -320,9 +319,8 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                            metadata_container_matches(m->metadata_version+9,
                                                       devnm)) {
                                if (verbose >= 0)
-                                       pr_err("Cannot stop container %s: "
-                                              "member %s still active\n",
-                                              devname, m->dev);
+                                       pr_err("Cannot stop container %s: member %s still active\n",
+                                              devname, m->devnm);
                                free_mdstat(mds);
                                rv = 1;
                                goto out;
@@ -346,9 +344,7 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
            sysfs_attribute_available(mdi, NULL, "reshape_direction") &&
            sysfs_get_str(mdi, NULL, "sync_action", buf, 20) > 0 &&
            strcmp(buf, "reshape\n") == 0 &&
-           sysfs_get_two(mdi, NULL, "raid_disks", &rd1, &rd2) == 2 &&
-           sysfs_set_str(mdi, NULL, "sync_action", "frozen") == 0) {
-               /* Array is frozen */
+           sysfs_get_two(mdi, NULL, "raid_disks", &rd1, &rd2) == 2) {
                unsigned long long position, curr;
                unsigned long long chunk1, chunk2;
                unsigned long long rddiv, chunkdiv;
@@ -359,12 +355,28 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                int delay;
                int scfd;
 
+               delay = 40;
+               while (rd1 > rd2 && delay > 0 &&
+                      sysfs_get_ll(mdi, NULL, "sync_max", &old_sync_max) == 0) {
+                       /* must be in the critical section - wait a bit */
+                       delay -= 1;
+                       usleep(100000);
+               }
+
+               if (sysfs_set_str(mdi, NULL, "sync_action", "frozen") != 0)
+                       goto done;
+               /* Array is frozen */
+
                rd1 -= mdi->array.level == 6 ? 2 : 1;
                rd2 -= mdi->array.level == 6 ? 2 : 1;
                sysfs_get_str(mdi, NULL, "reshape_direction", buf, sizeof(buf));
                if (strncmp(buf, "back", 4) == 0)
                        backwards = 1;
-               sysfs_get_ll(mdi, NULL, "reshape_position", &position);
+               if (sysfs_get_ll(mdi, NULL, "reshape_position", &position) != 0) {
+                       /* reshape must have finished now */
+                       sysfs_set_str(mdi, NULL, "sync_action", "idle");
+                       goto done;
+               }
                sysfs_get_two(mdi, NULL, "chunk_size", &chunk1, &chunk2);
                chunk1 /= 512;
                chunk2 /= 512;
@@ -381,9 +393,20 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                        size &= ~(chunk1-1);
                        size &= ~(chunk2-1);
                        /* rd1 must be smaller */
+                       /* Reshape may have progressed further backwards than
+                        * recorded, so target even further back (hence "-1")
+                        */
                        position = (position / sectors - 1) * sectors;
+                       /* rd1 is always the conversion factor between 'sync'
+                        * position and 'reshape' position.
+                        * We read 1 "new" stripe worth of data from where-ever,
+                        * and when write out that full stripe.
+                        */
                        sync_max = size - position/rd1;
                } else {
+                       /* Reshape will very likely be beyond position, and it may
+                        * be too late to stop at '+1', so aim for '+2'
+                        */
                        position = (position / sectors + 2) * sectors;
                        sync_max = position/rd1;
                }
@@ -406,6 +429,7 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                delay = 3000;
                scfd = sysfs_open(mdi->sys_name, NULL, "sync_completed");
                while (scfd >= 0 && delay > 0 && old_sync_max > 0) {
+                       unsigned long long max_completed;
                        sysfs_get_ll(mdi, NULL, "reshape_position", &curr);
                        sysfs_fd_get_str(scfd, buf, sizeof(buf));
                        if (strncmp(buf, "none", 4) == 0) {
@@ -419,7 +443,10 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                                        break;
                        }
 
-                       if (sysfs_fd_get_ll(scfd, &completed) == 0 &&
+                       if (sysfs_fd_get_two(scfd, &completed,
+                                            &max_completed) == 2 &&
+                           /* 'completed' sometimes reads as max-uulong */
+                           completed < max_completed &&
                            (completed > sync_max ||
                             (completed == sync_max && curr != position))) {
                                while (completed > sync_max) {
@@ -443,6 +470,7 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                        close(scfd);
 
        }
+done:
 
        /* As we have an O_EXCL open, any use of the device
         * which blocks STOP_ARRAY is probably a transient use,
@@ -460,9 +488,7 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                        pr_err("failed to stop array %s: %s\n",
                               devname, strerror(errno));
                        if (errno == EBUSY)
-                               cont_err("Perhaps a running "
-                                        "process, mounted filesystem "
-                                        "or active volume group?\n");
+                               cont_err("Perhaps a running process, mounted filesystem or active volume group?\n");
                }
                rv = 1;
                goto out;
@@ -652,8 +678,7 @@ int attempt_re_add(int fd, int tfd, struct mddev_dev *dv,
                        int rv = -1;
                        tfd = dev_open(dv->devname, O_RDWR);
                        if (tfd < 0) {
-                               pr_err("failed to open %s for"
-                                      " superblock update during re-add\n", dv->devname);
+                               pr_err("failed to open %s for superblock update during re-add\n", dv->devname);
                                return -1;
                        }
 
@@ -673,8 +698,7 @@ int attempt_re_add(int fd, int tfd, struct mddev_dev *dv,
                                rv = dev_st->ss->store_super(dev_st, tfd);
                        close(tfd);
                        if (rv != 0) {
-                               pr_err("failed to update"
-                                      " superblock during re-add\n");
+                               pr_err("failed to update superblock during re-add\n");
                                return -1;
                        }
                }
@@ -700,7 +724,8 @@ skip_re_add:
 int Manage_add(int fd, int tfd, struct mddev_dev *dv,
               struct supertype *tst, mdu_array_info_t *array,
               int force, int verbose, char *devname,
-              char *update, unsigned long rdev, unsigned long long array_size)
+              char *update, unsigned long rdev, unsigned long long array_size,
+              int raid_slot)
 {
        unsigned long long ldsize;
        struct supertype *dev_st = NULL;
@@ -717,17 +742,13 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
        if (tst->ss == &super0 && ldsize > 4ULL*1024*1024*1024*1024) {
                /* More than 4TB is wasted on v0.90 */
                if (!force) {
-                       pr_err("%s is larger than %s can "
-                              "effectively use.\n"
-                              "       Add --force is you "
-                              "really want to add this device.\n",
+                       pr_err("%s is larger than %s can effectively use.\n"
+                              "       Add --force is you really want to add this device.\n",
                               dv->devname, devname);
                        return -1;
                }
-               pr_err("%s is larger than %s can "
-                      "effectively use.\n"
-                      "       Adding anyway as --force "
-                      "was given.\n",
+               pr_err("%s is larger than %s can effectively use.\n"
+                      "       Adding anyway as --force was given.\n",
                       dv->devname, devname);
        }
        if (!tst->ss->external &&
@@ -795,7 +816,8 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
                }
 
                /* Make sure device is large enough */
-               if (tst->ss->avail_size(tst, ldsize/512, INVALID_SECTORS) <
+               if (tst->sb &&
+                   tst->ss->avail_size(tst, ldsize/512, INVALID_SECTORS) <
                    array_size) {
                        if (dv->disposition == 'M')
                                return 0;
@@ -841,16 +863,16 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
                        int d;
                        int found = 0;
 
-                       for (d = 0; d < MAX_DISKS && found < array->active_disks; d++) {
+                       for (d = 0; d < MAX_DISKS && found < array->nr_disks; d++) {
                                disc.number = d;
                                if (ioctl(fd, GET_DISK_INFO, &disc))
                                        continue;
                                if (disc.major == 0 && disc.minor == 0)
                                        continue;
+                               found++;
                                if (!(disc.state & (1<<MD_DISK_SYNC)))
                                        continue;
                                avail[disc.raid_disk] = 1;
-                               found++;
                        }
                        array_failed = !enough(array->level, array->raid_disks,
                                               array->layout, 1, avail);
@@ -893,7 +915,10 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
        }
        disc.major = major(rdev);
        disc.minor = minor(rdev);
-       disc.number =j;
+       if (raid_slot < 0)
+               disc.number = j;
+       else
+               disc.number = raid_slot;
        disc.state = 0;
        if (array->not_persistent==0) {
                int dfd;
@@ -934,6 +959,14 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
                        }
                free(used);
        }
+
+       if (array->state & (1 << MD_SB_CLUSTERED)) {
+               if (dv->disposition == 'c')
+                       disc.state |= (1 << MD_DISK_CANDIDATE);
+               else
+                       disc.state |= (1 << MD_DISK_CLUSTER_ADD);
+       }
+
        if (dv->writemostly == 1)
                disc.state |= (1 << MD_DISK_WRITEMOSTLY);
        if (tst->ss->external) {
@@ -949,8 +982,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
 
                container_fd = open_dev_excl(devnm);
                if (container_fd < 0) {
-                       pr_err("add failed for %s:"
-                              " could not get exclusive access to container\n",
+                       pr_err("add failed for %s: could not get exclusive access to container\n",
                               dv->devname);
                        tst->ss->free_super(tst);
                        return -1;
@@ -989,8 +1021,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
                 * would block add_disk */
                tst->ss->free_super(tst);
                if (sysfs_add_disk(sra, &new_mdi, 0) != 0) {
-                       pr_err("add new device to external metadata"
-                              " failed for %s\n", dv->devname);
+                       pr_err("add new device to external metadata failed for %s\n", dv->devname);
                        close(container_fd);
                        sysfs_free(sra);
                        return -1;
@@ -1032,8 +1063,7 @@ int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
                strcpy(devnm, fd2devnm(fd));
                lfd = open_dev_excl(devnm);
                if (lfd < 0) {
-                       pr_err("Cannot get exclusive access "
-                              " to container - odd\n");
+                       pr_err("Cannot get exclusive access  to container - odd\n");
                        return -1;
                }
                /* We may not be able to check on holders in
@@ -1093,8 +1123,7 @@ int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
                }
        }
        if (err) {
-               pr_err("hot remove failed "
-                      "for %s: %s\n",  dv->devname,
+               pr_err("hot remove failed for %s: %s\n",        dv->devname,
                       strerror(errno));
                if (lfd >= 0)
                        close(lfd);
@@ -1257,6 +1286,7 @@ int Manage_subdevs(char *devname, int fd,
         *        variant on 'A'
         *  'F' - Another variant of 'A', where the device was faulty
         *        so must be removed from the array first.
+        *  'c' - confirm the device as found (for clustered environments)
         *
         * For 'f' and 'r', the device can also be a kernel-internal
         * name such as 'sdb'.
@@ -1264,7 +1294,6 @@ int Manage_subdevs(char *devname, int fd,
        mdu_array_info_t array;
        unsigned long long array_size;
        struct mddev_dev *dv;
-       struct stat stb;
        int tfd = -1;
        struct supertype *tst;
        char *subarray = NULL;
@@ -1273,6 +1302,7 @@ int Manage_subdevs(char *devname, int fd,
        struct mdinfo info;
        int frozen = 0;
        int busy = 0;
+       int raid_slot = -1;
 
        if (ioctl(fd, GET_ARRAY_INFO, &array)) {
                pr_err("Cannot get array info for %s\n",
@@ -1296,16 +1326,27 @@ int Manage_subdevs(char *devname, int fd,
                goto abort;
        }
 
-       stb.st_rdev = 0;
        for (dv = devlist; dv; dv = dv->next) {
+               unsigned long rdev = 0; /* device to add/remove etc */
                int rv;
+               int mj,mn;
+
+               raid_slot = -1;
+               if (dv->disposition == 'c') {
+                       rv = parse_cluster_confirm_arg(dv->devname,
+                                                      &dv->devname,
+                                                      &raid_slot);
+                       if (rv) {
+                               pr_err("Could not get the devname of cluster\n");
+                               goto abort;
+                       }
+               }
 
                if (strcmp(dv->devname, "failed") == 0 ||
                    strcmp(dv->devname, "faulty") == 0) {
                        if (dv->disposition != 'A'
                            && dv->disposition != 'r') {
-                               pr_err("%s only meaningful "
-                                       "with -r or --re-add, not -%c\n",
+                               pr_err("%s only meaningful with -r or --re-add, not -%c\n",
                                        dv->devname, dv->disposition);
                                goto abort;
                        }
@@ -1315,8 +1356,7 @@ int Manage_subdevs(char *devname, int fd,
                }
                if (strcmp(dv->devname, "detached") == 0) {
                        if (dv->disposition != 'r' && dv->disposition != 'f') {
-                               pr_err("%s only meaningful "
-                                       "with -r of -f, not -%c\n",
+                               pr_err("%s only meaningful with -r of -f, not -%c\n",
                                        dv->devname, dv->disposition);
                                goto abort;
                        }
@@ -1327,9 +1367,13 @@ int Manage_subdevs(char *devname, int fd,
                if (strcmp(dv->devname, "missing") == 0) {
                        struct mddev_dev *add_devlist = NULL;
                        struct mddev_dev **dp;
+                       if (dv->disposition == 'c') {
+                               rv = ioctl(fd, CLUSTERED_DISK_NACK, NULL);
+                               break;
+                       }
+
                        if (dv->disposition != 'A') {
-                               pr_err("'missing' only meaningful "
-                                      "with --re-add\n");
+                               pr_err("'missing' only meaningful with --re-add\n");
                                goto abort;
                        }
                        add_devlist = conf_get_devs();
@@ -1381,8 +1425,7 @@ int Manage_subdevs(char *devname, int fd,
                        int found = 0;
                        char dname[55];
                        if (dv->disposition != 'r' && dv->disposition != 'f') {
-                               pr_err("%s only meaningful "
-                                       "with -r or -f, not -%c\n",
+                               pr_err("%s only meaningful with -r or -f, not -%c\n",
                                        dv->devname, dv->disposition);
                                goto abort;
                        }
@@ -1391,10 +1434,9 @@ int Manage_subdevs(char *devname, int fd,
                        sysfd = sysfs_open(fd2devnm(fd), dname, "block/dev");
                        if (sysfd >= 0) {
                                char dn[20];
-                               int mj,mn;
                                if (sysfs_fd_get_str(sysfd, dn, 20) > 0 &&
                                    sscanf(dn, "%d:%d", &mj,&mn) == 2) {
-                                       stb.st_rdev = makedev(mj,mn);
+                                       rdev = makedev(mj,mn);
                                        found = 1;
                                }
                                close(sysfd);
@@ -1403,13 +1445,19 @@ int Manage_subdevs(char *devname, int fd,
                        if (!found) {
                                sysfd = sysfs_open(fd2devnm(fd), dname, "state");
                                if (sysfd < 0) {
-                                       pr_err("%s does not appear "
-                                               "to be a component of %s\n",
+                                       pr_err("%s does not appear to be a component of %s\n",
                                                dv->devname, devname);
                                        goto abort;
                                }
                        }
+               } else if ((dv->disposition == 'r' || dv->disposition == 'f')
+                          && get_maj_min(dv->devname, &mj, &mn)) {
+                       /* for 'fail' and 'remove', the device might
+                        * not exist.
+                        */
+                       rdev = makedev(mj, mn);
                } else {
+                       struct stat stb;
                        tfd = dev_open(dv->devname, O_RDONLY);
                        if (tfd >= 0)
                                fstat(tfd, &stb);
@@ -1442,6 +1490,7 @@ int Manage_subdevs(char *devname, int fd,
                                        goto abort;
                                }
                        }
+                       rdev = stb.st_rdev;
                }
                switch(dv->disposition){
                default:
@@ -1453,17 +1502,15 @@ int Manage_subdevs(char *devname, int fd,
                case 'A':
                case 'M': /* --re-add missing */
                case 'F': /* --re-add faulty  */
+               case 'c': /* --cluster-confirm */
                        /* add the device */
                        if (subarray) {
-                               pr_err("Cannot add disks to a"
-                                       " \'member\' array, perform this"
-                                       " operation on the parent container\n");
+                               pr_err("Cannot add disks to a \'member\' array, perform this operation on the parent container\n");
                                goto abort;
                        }
                        if (dv->disposition == 'F')
                                /* Need to remove first */
-                               ioctl(fd, HOT_REMOVE_DISK,
-                                     (unsigned long)stb.st_rdev);
+                               ioctl(fd, HOT_REMOVE_DISK, rdev);
                        /* Make sure it isn't in use (in 2.6 or later) */
                        tfd = dev_open(dv->devname, O_RDONLY|O_EXCL);
                        if (tfd >= 0) {
@@ -1489,7 +1536,7 @@ int Manage_subdevs(char *devname, int fd,
                        }
                        rv = Manage_add(fd, tfd, dv, tst, &array,
                                        force, verbose, devname, update,
-                                       stb.st_rdev, array_size);
+                                       rdev, array_size, raid_slot);
                        close(tfd);
                        tfd = -1;
                        if (rv < 0)
@@ -1501,13 +1548,11 @@ int Manage_subdevs(char *devname, int fd,
                case 'r':
                        /* hot remove */
                        if (subarray) {
-                               pr_err("Cannot remove disks from a"
-                                       " \'member\' array, perform this"
-                                       " operation on the parent container\n");
+                               pr_err("Cannot remove disks from a \'member\' array, perform this operation on the parent container\n");
                                rv = -1;
                        } else
                                rv = Manage_remove(tst, fd, dv, sysfd,
-                                                  stb.st_rdev, verbose,
+                                                  rdev, verbose,
                                                   devname);
                        if (sysfd >= 0)
                                close(sysfd);
@@ -1522,7 +1567,7 @@ int Manage_subdevs(char *devname, int fd,
                        /* FIXME check current member */
                        if ((sysfd >= 0 && write(sysfd, "faulty", 6) != 6) ||
                            (sysfd < 0 && ioctl(fd, SET_DISK_FAULTY,
-                                               (unsigned long) stb.st_rdev))) {
+                                               rdev))) {
                                if (errno == EBUSY)
                                        busy = 1;
                                pr_err("set device faulty failed for %s:  %s\n",
@@ -1541,9 +1586,7 @@ int Manage_subdevs(char *devname, int fd,
                        break;
                case 'R': /* Mark as replaceable */
                        if (subarray) {
-                               pr_err("Cannot replace disks in a"
-                                       " \'member\' array, perform this"
-                                       " operation on the parent container\n");
+                               pr_err("Cannot replace disks in a \'member\' array, perform this operation on the parent container\n");
                                rv = -1;
                        } else {
                                if (!frozen) {
@@ -1553,7 +1596,7 @@ int Manage_subdevs(char *devname, int fd,
                                                frozen = -1;
                                }
                                rv = Manage_replace(tst, fd, dv,
-                                                   stb.st_rdev, verbose,
+                                                   rdev, verbose,
                                                    devname);
                        }
                        if (rv < 0)
@@ -1567,7 +1610,7 @@ int Manage_subdevs(char *devname, int fd,
                        goto abort;
                case 'w': /* --with device which was matched */
                        rv = Manage_with(tst, fd, dv,
-                                        stb.st_rdev, verbose, devname);
+                                        rdev, verbose, devname);
                        if (rv < 0)
                                goto abort;
                        break;