]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - Manage.c
mdadm: Fixup more broken logical operator formatting
[thirdparty/mdadm.git] / Manage.c
index 17d66de9c08f29f9d5fd213c6ff80229da68337f..04b9398c2e4f1e76587de518966b0d557b9688ba 100644 (file)
--- a/Manage.c
+++ b/Manage.c
 #include "md_p.h"
 #include <ctype.h>
 
-#define REGISTER_DEV           _IO (MD_MAJOR, 1)
-#define START_MD               _IO (MD_MAJOR, 2)
-#define STOP_MD                        _IO (MD_MAJOR, 3)
-
 int Manage_ro(char *devname, int fd, int readonly)
 {
        /* switch to readonly or rw
@@ -40,17 +36,9 @@ int Manage_ro(char *devname, int fd, int readonly)
         * use RESTART_ARRAY_RW or STOP_ARRAY_RO
         *
         */
-       mdu_array_info_t array;
-#ifndef MDASSEMBLE
        struct mdinfo *mdi;
-#endif
        int rv = 0;
 
-       if (md_get_version(fd) < 9000) {
-               pr_err("need md driver version 0.90.0 or later\n");
-               return 1;
-       }
-#ifndef MDASSEMBLE
        /* If this is an externally-managed array, we need to modify the
         * metadata_version so that mdmon doesn't undo our change.
         */
@@ -94,10 +82,9 @@ int Manage_ro(char *devname, int fd, int readonly)
                }
                goto out;
        }
-#endif
-       if (ioctl(fd, GET_ARRAY_INFO, &array)) {
-               pr_err("%s does not appear to be active.\n",
-                       devname);
+
+       if (!md_array_active(fd)) {
+               pr_err("%s does not appear to be active.\n", devname);
                rv = 1;
                goto out;
        }
@@ -118,15 +105,10 @@ int Manage_ro(char *devname, int fd, int readonly)
                }
        }
 out:
-#ifndef MDASSEMBLE
-       if (mdi)
-               sysfs_free(mdi);
-#endif
+       sysfs_free(mdi);
        return rv;
 }
 
-#ifndef MDASSEMBLE
-
 static void remove_devices(char *devnm, char *path)
 {
        /*
@@ -177,10 +159,6 @@ int Manage_run(char *devname, int fd, struct context *c)
         */
        char nm[32], *nmp;
 
-       if (md_get_version(fd) < 9000) {
-               pr_err("need md driver version 0.90.0 or later\n");
-               return 1;
-       }
        nmp = fd2devnm(fd);
        if (!nmp) {
                pr_err("Cannot find %s in sysfs!!\n", devname);
@@ -208,17 +186,6 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
        if (will_retry && verbose == 0)
                verbose = -1;
 
-       if (md_get_version(fd) < 9000) {
-               if (ioctl(fd, STOP_MD, 0) == 0)
-                       return 0;
-               pr_err("stopping device %s failed: %s\n",
-                      devname, strerror(errno));
-               return 1;
-       }
-
-       /* If this is an mdmon managed array, just write 'inactive'
-        * to the array state and let mdmon clear up.
-        */
        strcpy(devnm, fd2devnm(fd));
        /* Get EXCL access first.  If this fails, then attempting
         * to stop is probably a bad idea.
@@ -237,11 +204,13 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
        count = 5;
        while (((fd = ((devname[0] == '/')
                       ?open(devname, O_RDONLY|O_EXCL)
-                      :open_dev_flags(devnm, O_RDONLY|O_EXCL))) < 0
-               || strcmp(fd2devnm(fd), devnm) != 0)
-              && container[0]
-              && mdmon_running(container)
-              && count) {
+                      :open_dev_flags(devnm, O_RDONLY|O_EXCL))) < 0 ||
+               strcmp(fd2devnm(fd), devnm) != 0) && container[0] &&
+              mdmon_running(container) && count) {
+               /* Can't open, so something might be wrong.  However it
+                * is a container, so we might be racing with mdmon, so
+                * retry for a bit.
+                */
                if (fd >= 0)
                        close(fd);
                flush_mdmon(container);
@@ -255,6 +224,9 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                               devname);
                return 1;
        }
+       /* If this is an mdmon managed array, just write 'inactive'
+        * to the array state and let mdmon clear up.
+        */
        if (mdi &&
            mdi->array.level > 0 &&
            is_subarray(mdi->text_version)) {
@@ -262,7 +234,7 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                /* This is mdmon managed. */
                close(fd);
 
-               /* As we have an O_EXCL open, any use of the device
+               /* As we had an O_EXCL open, any use of the device
                 * which blocks STOP_ARRAY is probably a transient use,
                 * so it is reasonable to retry for a while - 5 seconds.
                 */
@@ -270,8 +242,8 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                while (count &&
                       (err = sysfs_set_str(mdi, NULL,
                                            "array_state",
-                                           "inactive")) < 0
-                      && errno == EBUSY) {
+                                           "inactive")) < 0 &&
+                      errno == EBUSY) {
                        usleep(200000);
                        count--;
                }
@@ -340,9 +312,7 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
            sysfs_attribute_available(mdi, NULL, "reshape_direction") &&
            sysfs_get_str(mdi, NULL, "sync_action", buf, 20) > 0 &&
            strcmp(buf, "reshape\n") == 0 &&
-           sysfs_get_two(mdi, NULL, "raid_disks", &rd1, &rd2) == 2 &&
-           sysfs_set_str(mdi, NULL, "sync_action", "frozen") == 0) {
-               /* Array is frozen */
+           sysfs_get_two(mdi, NULL, "raid_disks", &rd1, &rd2) == 2) {
                unsigned long long position, curr;
                unsigned long long chunk1, chunk2;
                unsigned long long rddiv, chunkdiv;
@@ -353,12 +323,28 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                int delay;
                int scfd;
 
+               delay = 40;
+               while (rd1 > rd2 && delay > 0 &&
+                      sysfs_get_ll(mdi, NULL, "sync_max", &old_sync_max) == 0) {
+                       /* must be in the critical section - wait a bit */
+                       delay -= 1;
+                       usleep(100000);
+               }
+
+               if (sysfs_set_str(mdi, NULL, "sync_action", "frozen") != 0)
+                       goto done;
+               /* Array is frozen */
+
                rd1 -= mdi->array.level == 6 ? 2 : 1;
                rd2 -= mdi->array.level == 6 ? 2 : 1;
                sysfs_get_str(mdi, NULL, "reshape_direction", buf, sizeof(buf));
                if (strncmp(buf, "back", 4) == 0)
                        backwards = 1;
-               sysfs_get_ll(mdi, NULL, "reshape_position", &position);
+               if (sysfs_get_ll(mdi, NULL, "reshape_position", &position) != 0) {
+                       /* reshape must have finished now */
+                       sysfs_set_str(mdi, NULL, "sync_action", "idle");
+                       goto done;
+               }
                sysfs_get_two(mdi, NULL, "chunk_size", &chunk1, &chunk2);
                chunk1 /= 512;
                chunk2 /= 512;
@@ -375,9 +361,20 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                        size &= ~(chunk1-1);
                        size &= ~(chunk2-1);
                        /* rd1 must be smaller */
+                       /* Reshape may have progressed further backwards than
+                        * recorded, so target even further back (hence "-1")
+                        */
                        position = (position / sectors - 1) * sectors;
+                       /* rd1 is always the conversion factor between 'sync'
+                        * position and 'reshape' position.
+                        * We read 1 "new" stripe worth of data from where-ever,
+                        * and when write out that full stripe.
+                        */
                        sync_max = size - position/rd1;
                } else {
+                       /* Reshape will very likely be beyond position, and it may
+                        * be too late to stop at '+1', so aim for '+2'
+                        */
                        position = (position / sectors + 2) * sectors;
                        sync_max = position/rd1;
                }
@@ -400,6 +397,7 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                delay = 3000;
                scfd = sysfs_open(mdi->sys_name, NULL, "sync_completed");
                while (scfd >= 0 && delay > 0 && old_sync_max > 0) {
+                       unsigned long long max_completed;
                        sysfs_get_ll(mdi, NULL, "reshape_position", &curr);
                        sysfs_fd_get_str(scfd, buf, sizeof(buf));
                        if (strncmp(buf, "none", 4) == 0) {
@@ -413,7 +411,10 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                                        break;
                        }
 
-                       if (sysfs_fd_get_ll(scfd, &completed) == 0 &&
+                       if (sysfs_fd_get_two(scfd, &completed,
+                                            &max_completed) == 2 &&
+                           /* 'completed' sometimes reads as max-uulong */
+                           completed < max_completed &&
                            (completed > sync_max ||
                             (completed == sync_max && curr != position))) {
                                while (completed > sync_max) {
@@ -437,15 +438,15 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                        close(scfd);
 
        }
+done:
 
        /* As we have an O_EXCL open, any use of the device
         * which blocks STOP_ARRAY is probably a transient use,
         * so it is reasonable to retry for a while - 5 seconds.
         */
        count = 25; err = 0;
-       while (count && fd >= 0
-              && (err = ioctl(fd, STOP_ARRAY, NULL)) < 0
-              && errno == EBUSY) {
+       while (count && fd >= 0 &&
+              (err = ioctl(fd, STOP_ARRAY, NULL)) < 0 && errno == EBUSY) {
                usleep(200000);
                count --;
        }
@@ -459,14 +460,17 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                rv = 1;
                goto out;
        }
-       /* prior to 2.6.28, KOBJ_CHANGE was not sent when an md array
-        * was stopped, so We'll do it here just to be sure.  Drop any
-        * partitions as well...
-        */
-       if (fd >= 0)
-               ioctl(fd, BLKRRPART, 0);
-       if (mdi)
-               sysfs_uevent(mdi, "change");
+
+       if (get_linux_version() < 2006028) {
+               /* prior to 2.6.28, KOBJ_CHANGE was not sent when an md array
+                * was stopped, so We'll do it here just to be sure.  Drop any
+                * partitions as well...
+                */
+               if (fd >= 0)
+                       ioctl(fd, BLKRRPART, 0);
+               if (mdi)
+                       sysfs_uevent(mdi, "change");
+       }
 
        if (devnm[0] && use_udev()) {
                struct map_ent *mp = map_by_devnm(&map, devnm);
@@ -479,8 +483,7 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
        map_remove(&map, devnm);
        map_unlock(&map);
 out:
-       if (mdi)
-               sysfs_free(mdi);
+       sysfs_free(mdi);
 
        return rv;
 }
@@ -504,14 +507,14 @@ static void add_faulty(struct mddev_dev *dv, int fd, char disp)
        int remaining_disks;
        int i;
 
-       if (ioctl(fd, GET_ARRAY_INFO, &array) != 0)
+       if (md_get_array_info(fd, &array) != 0)
                return;
 
        remaining_disks = array.nr_disks;
        for (i = 0; i < MAX_DISKS && remaining_disks > 0; i++) {
                char buf[40];
                disk.number = i;
-               if (ioctl(fd, GET_DISK_INFO, &disk) != 0)
+               if (md_get_disk_info(fd, &disk) != 0)
                        continue;
                if (disk.major == 0 && disk.minor == 0)
                        continue;
@@ -530,7 +533,7 @@ static void add_detached(struct mddev_dev *dv, int fd, char disp)
        int remaining_disks;
        int i;
 
-       if (ioctl(fd, GET_ARRAY_INFO, &array) != 0)
+       if (md_get_array_info(fd, &array) != 0)
                return;
 
        remaining_disks = array.nr_disks;
@@ -538,7 +541,7 @@ static void add_detached(struct mddev_dev *dv, int fd, char disp)
                char buf[40];
                int sfd;
                disk.number = i;
-               if (ioctl(fd, GET_DISK_INFO, &disk) != 0)
+               if (md_get_disk_info(fd, &disk) != 0)
                        continue;
                if (disk.major == 0 && disk.minor == 0)
                        continue;
@@ -567,7 +570,7 @@ static void add_set(struct mddev_dev *dv, int fd, char set_char)
        int copies, set;
        int i;
 
-       if (ioctl(fd, GET_ARRAY_INFO, &array) != 0)
+       if (md_get_array_info(fd, &array) != 0)
                return;
        if (array.level != 10)
                return;
@@ -580,7 +583,7 @@ static void add_set(struct mddev_dev *dv, int fd, char set_char)
        for (i = 0; i < MAX_DISKS && remaining_disks > 0; i++) {
                char buf[40];
                disk.number = i;
-               if (ioctl(fd, GET_DISK_INFO, &disk) != 0)
+               if (md_get_disk_info(fd, &disk) != 0)
                        continue;
                if (disk.major == 0 && disk.minor == 0)
                        continue;
@@ -626,21 +629,34 @@ int attempt_re_add(int fd, int tfd, struct mddev_dev *dv,
                    get_linux_version() <= 2006018)
                        goto skip_re_add;
                disc.number = mdi.disk.number;
-               if (ioctl(fd, GET_DISK_INFO, &disc) != 0
-                   || disc.major != 0 || disc.minor != 0
-                       )
+               if (md_get_disk_info(fd, &disc) != 0 ||
+                   disc.major != 0 || disc.minor != 0)
                        goto skip_re_add;
                disc.major = major(rdev);
                disc.minor = minor(rdev);
                disc.number = mdi.disk.number;
                disc.raid_disk = mdi.disk.raid_disk;
                disc.state = mdi.disk.state;
-               if (dv->writemostly == 1)
+               if (array->state & (1 << MD_SB_CLUSTERED)) {
+                       /* extra flags are needed when adding to a cluster as
+                        * there are two cases to distinguish
+                        */
+                       if (dv->disposition == 'c')
+                               disc.state |= (1 << MD_DISK_CANDIDATE);
+                       else
+                               disc.state |= (1 << MD_DISK_CLUSTER_ADD);
+               }
+               if (dv->writemostly == FlagSet)
                        disc.state |= 1 << MD_DISK_WRITEMOSTLY;
-               if (dv->writemostly == 2)
+               if (dv->writemostly == FlagClear)
                        disc.state &= ~(1 << MD_DISK_WRITEMOSTLY);
+               if (dv->failfast == FlagSet)
+                       disc.state |= 1 << MD_DISK_FAILFAST;
+               if (dv->failfast == FlagClear)
+                       disc.state &= ~(1 << MD_DISK_FAILFAST);
                remove_partitions(tfd);
-               if (update || dv->writemostly > 0) {
+               if (update || dv->writemostly != FlagDefault ||
+                   dv->failfast != FlagDefault) {
                        int rv = -1;
                        tfd = dev_open(dv->devname, O_RDWR);
                        if (tfd < 0) {
@@ -648,14 +664,22 @@ int attempt_re_add(int fd, int tfd, struct mddev_dev *dv,
                                return -1;
                        }
 
-                       if (dv->writemostly == 1)
+                       if (dv->writemostly == FlagSet)
                                rv = dev_st->ss->update_super(
                                        dev_st, NULL, "writemostly",
                                        devname, verbose, 0, NULL);
-                       if (dv->writemostly == 2)
+                       if (dv->writemostly == FlagClear)
                                rv = dev_st->ss->update_super(
                                        dev_st, NULL, "readwrite",
                                        devname, verbose, 0, NULL);
+                       if (dv->failfast == FlagSet)
+                               rv = dev_st->ss->update_super(
+                                       dev_st, NULL, "failfast",
+                                       devname, verbose, 0, NULL);
+                       if (dv->failfast == FlagClear)
+                               rv = dev_st->ss->update_super(
+                                       dev_st, NULL, "nofailfast",
+                                       devname, verbose, 0, NULL);
                        if (update)
                                rv = dev_st->ss->update_super(
                                        dev_st, NULL, update,
@@ -690,10 +714,11 @@ skip_re_add:
 int Manage_add(int fd, int tfd, struct mddev_dev *dv,
               struct supertype *tst, mdu_array_info_t *array,
               int force, int verbose, char *devname,
-              char *update, unsigned long rdev, unsigned long long array_size)
+              char *update, unsigned long rdev, unsigned long long array_size,
+              int raid_slot)
 {
        unsigned long long ldsize;
-       struct supertype *dev_st = NULL;
+       struct supertype *dev_st;
        int j;
        mdu_disk_info_t disc;
 
@@ -716,9 +741,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
                       "       Adding anyway as --force was given.\n",
                       dv->devname, devname);
        }
-       if (!tst->ss->external &&
-           array->major_version == 0 &&
-           md_get_version(fd)%100 < 2) {
+       if (!tst->ss->external && array->major_version == 0) {
                if (ioctl(fd, HOT_ADD_DISK, rdev)==0) {
                        if (verbose >= 0)
                                pr_err("hot added %s\n",
@@ -747,7 +770,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
                                char *dev;
                                int dfd;
                                disc.number = j;
-                               if (ioctl(fd, GET_DISK_INFO, &disc))
+                               if (md_get_disk_info(fd, &disc))
                                        continue;
                                if (disc.major==0 && disc.minor==0)
                                        continue;
@@ -769,8 +792,8 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
                                break;
                        }
                /* FIXME this is a bad test to be using */
-               if (!tst->sb && (dv->disposition != 'a'
-                                && dv->disposition != 'S')) {
+               if (!tst->sb && (dv->disposition != 'a' &&
+                                dv->disposition != 'S')) {
                        /* we are re-adding a device to a
                         * completely dead array - have to depend
                         * on kernel to check
@@ -781,7 +804,8 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
                }
 
                /* Make sure device is large enough */
-               if (tst->sb &&
+               if (dv->disposition != 'j' &&  /* skip size check for Journal */
+                   tst->sb &&
                    tst->ss->avail_size(tst, ldsize/512, INVALID_SECTORS) <
                    array_size) {
                        if (dv->disposition == 'M')
@@ -797,20 +821,19 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
                 * simply re-add it.
                 */
 
-               if (array->not_persistent==0) {
+               if (array->not_persistent == 0) {
                        dev_st = dup_super(tst);
                        dev_st->ss->load_super(dev_st, tfd, NULL);
-               }
-               if (dev_st && dev_st->sb && dv->disposition != 'S') {
-                       int rv = attempt_re_add(fd, tfd, dv,
-                                               dev_st, tst,
-                                               rdev,
-                                               update, devname,
-                                               verbose,
-                                               array);
-                       dev_st->ss->free_super(dev_st);
-                       if (rv)
-                               return rv;
+                       if (dev_st->sb && dv->disposition != 'S') {
+                               int rv;
+
+                               rv = attempt_re_add(fd, tfd, dv, dev_st, tst,
+                                                   rdev, update, devname,
+                                                   verbose, array);
+                               dev_st->ss->free_super(dev_st);
+                               if (rv)
+                                       return rv;
+                       }
                }
                if (dv->disposition == 'M') {
                        if (verbose > 0)
@@ -830,14 +853,14 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
 
                        for (d = 0; d < MAX_DISKS && found < array->nr_disks; d++) {
                                disc.number = d;
-                               if (ioctl(fd, GET_DISK_INFO, &disc))
+                               if (md_get_disk_info(fd, &disc))
                                        continue;
                                if (disc.major == 0 && disc.minor == 0)
                                        continue;
-                               found++;
                                if (!(disc.state & (1<<MD_DISK_SYNC)))
                                        continue;
                                avail[disc.raid_disk] = 1;
+                               found++;
                        }
                        array_failed = !enough(array->level, array->raid_disks,
                                               array->layout, 1, avail);
@@ -871,7 +894,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
         */
        for (j = array->raid_disks; j < tst->max_devs; j++) {
                disc.number = j;
-               if (ioctl(fd, GET_DISK_INFO, &disc))
+               if (md_get_disk_info(fd, &disc))
                        break;
                if (disc.major==0 && disc.minor==0)
                        break;
@@ -880,12 +903,47 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
        }
        disc.major = major(rdev);
        disc.minor = minor(rdev);
-       disc.number =j;
+       if (raid_slot < 0)
+               disc.number = j;
+       else
+               disc.number = raid_slot;
        disc.state = 0;
+
+       /* only add journal to array that supports journaling */
+       if (dv->disposition == 'j') {
+               struct mdinfo mdi;
+               struct mdinfo *mdp;
+
+               mdp = sysfs_read(fd, NULL, GET_ARRAY_STATE);
+               if (!mdp) {
+                       pr_err("%s unable to read array state.\n", devname);
+                       return -1;
+               }
+
+               if (mdp->array_state != ARRAY_READONLY) {
+                       sysfs_free(mdp);
+                       pr_err("%s is not readonly, cannot add journal.\n", devname);
+                       return -1;
+               }
+
+               sysfs_free(mdp);
+
+               tst->ss->getinfo_super(tst, &mdi, NULL);
+               if (mdi.journal_device_required == 0) {
+                       pr_err("%s does not support journal device.\n", devname);
+                       return -1;
+               }
+               disc.raid_disk = 0;
+       }
+
        if (array->not_persistent==0) {
                int dfd;
-               if (dv->writemostly == 1)
+               if (dv->disposition == 'j')
+                       disc.state |= (1 << MD_DISK_JOURNAL) | (1 << MD_DISK_SYNC);
+               if (dv->writemostly == FlagSet)
                        disc.state |= 1 << MD_DISK_WRITEMOSTLY;
+               if (dv->failfast == FlagSet)
+                       disc.state |= 1 << MD_DISK_FAILFAST;
                dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
                if (tst->ss->add_to_super(tst, &disc, dfd,
                                          dv->devname, INVALID_SECTORS))
@@ -901,7 +959,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
                for (j = 0; j < tst->max_devs; j++) {
                        mdu_disk_info_t disc2;
                        disc2.number = j;
-                       if (ioctl(fd, GET_DISK_INFO, &disc2))
+                       if (md_get_disk_info(fd, &disc2))
                                continue;
                        if (disc2.major==0 && disc2.minor==0)
                                continue;
@@ -921,8 +979,18 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
                        }
                free(used);
        }
-       if (dv->writemostly == 1)
+
+       if (array->state & (1 << MD_SB_CLUSTERED)) {
+               if (dv->disposition == 'c')
+                       disc.state |= (1 << MD_DISK_CANDIDATE);
+               else
+                       disc.state |= (1 << MD_DISK_CLUSTER_ADD);
+       }
+
+       if (dv->writemostly == FlagSet)
                disc.state |= (1 << MD_DISK_WRITEMOSTLY);
+       if (dv->failfast == FlagSet)
+               disc.state |= (1 << MD_DISK_FAILFAST);
        if (tst->ss->external) {
                /* add a disk
                 * to an external metadata container */
@@ -986,10 +1054,20 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
        } else {
                tst->ss->free_super(tst);
                if (ioctl(fd, ADD_NEW_DISK, &disc)) {
-                       pr_err("add new device failed for %s as %d: %s\n",
-                              dv->devname, j, strerror(errno));
+                       if (dv->disposition == 'j')
+                               pr_err("Failed to hot add %s as journal, "
+                                      "please try restart %s.\n", dv->devname, devname);
+                       else
+                               pr_err("add new device failed for %s as %d: %s\n",
+                                      dv->devname, j, strerror(errno));
                        return -1;
                }
+               if (dv->disposition == 'j') {
+                       pr_err("Journal added successfully, making %s read-write\n", devname);
+                       if (Manage_ro(devname, fd, -1))
+                               pr_err("Failed to make %s read-write\n", devname);
+               }
+
        }
        if (verbose >= 0)
                pr_err("added %s\n", dv->devname);
@@ -997,7 +1075,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
 }
 
 int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
-                 int sysfd, unsigned long rdev, int verbose, char *devname)
+                 int sysfd, unsigned long rdev, int force, int verbose, char *devname)
 {
        int lfd = -1;
        int err;
@@ -1029,19 +1107,34 @@ int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
                 */
                if (rdev == 0)
                        ret = -1;
-               else
-                       ret = sysfs_unique_holder(devnm, rdev);
-               if (ret == 0) {
-                       pr_err("%s is not a member, cannot remove.\n",
-                              dv->devname);
-                       close(lfd);
-                       return -1;
-               }
-               if (ret >= 2) {
-                       pr_err("%s is still in use, cannot remove.\n",
-                              dv->devname);
-                       close(lfd);
-                       return -1;
+               else {
+                       /*
+                        * The drive has already been set to 'faulty', however
+                        * monitor might not have had time to process it and the
+                        * drive might still have an entry in the 'holders'
+                        * directory. Try a few times to avoid a false error
+                        */
+                       int count = 20;
+
+                       do {
+                               ret = sysfs_unique_holder(devnm, rdev);
+                               if (ret < 2)
+                                       break;
+                               usleep(100 * 1000);     /* 100ms */
+                       } while (--count > 0);
+
+                       if (ret == 0) {
+                               pr_err("%s is not a member, cannot remove.\n",
+                                       dv->devname);
+                               close(lfd);
+                               return -1;
+                       }
+                       if (ret >= 2) {
+                               pr_err("%s is still in use, cannot remove.\n",
+                                       dv->devname);
+                               close(lfd);
+                               return -1;
+                       }
                }
        }
        /* FIXME check that it is a current member */
@@ -1049,13 +1142,9 @@ int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
                /* device has been removed and we don't know
                 * the major:minor number
                 */
-               int n = write(sysfd, "remove", 6);
-               if (n != 6)
-                       err = -1;
-               else
-                       err = 0;
+               err = sys_hot_remove_disk(sysfd, force);
        } else {
-               err = ioctl(fd, HOT_REMOVE_DISK, rdev);
+               err = hot_remove_disk(fd, rdev, force);
                if (err && errno == ENODEV) {
                        /* Old kernels rejected this if no personality
                         * is registered */
@@ -1072,8 +1161,7 @@ int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
                                                    "state", "remove");
                        else
                                err = -1;
-                       if (sra)
-                               sysfs_free(sra);
+                       sysfs_free(sra);
                }
        }
        if (err) {
@@ -1222,6 +1310,7 @@ int Manage_subdevs(char *devname, int fd,
         *         try HOT_ADD_DISK
         *         If that fails EINVAL, try ADD_NEW_DISK
         *  'S' - add the device as a spare - don't try re-add
+        *  'j' - add the device as a journal device
         *  'A' - re-add the device
         *  'r' - remove the device: HOT_REMOVE_DISK
         *        device can be 'faulty' or 'detached' in which case all
@@ -1240,6 +1329,7 @@ int Manage_subdevs(char *devname, int fd,
         *        variant on 'A'
         *  'F' - Another variant of 'A', where the device was faulty
         *        so must be removed from the array first.
+        *  'c' - confirm the device as found (for clustered environments)
         *
         * For 'f' and 'r', the device can also be a kernel-internal
         * name such as 'sdb'.
@@ -1253,16 +1343,20 @@ int Manage_subdevs(char *devname, int fd,
        int sysfd = -1;
        int count = 0; /* number of actions taken */
        struct mdinfo info;
+       struct mdinfo devinfo;
        int frozen = 0;
        int busy = 0;
+       int raid_slot = -1;
 
-       if (ioctl(fd, GET_ARRAY_INFO, &array)) {
-               pr_err("Cannot get array info for %s\n",
-                       devname);
+       if (sysfs_init(&info, fd, NULL)) {
+               pr_err("sysfs not availabile for %s\n", devname);
                goto abort;
        }
-       sysfs_init(&info, fd, NULL);
 
+       if (md_get_array_info(fd, &array)) {
+               pr_err("Cannot get array info for %s\n", devname);
+               goto abort;
+       }
        /* array.size is only 32 bits and may be truncated.
         * So read from sysfs if possible, and record number of sectors
         */
@@ -1283,10 +1377,20 @@ int Manage_subdevs(char *devname, int fd,
                int rv;
                int mj,mn;
 
+               raid_slot = -1;
+               if (dv->disposition == 'c') {
+                       rv = parse_cluster_confirm_arg(dv->devname,
+                                                      &dv->devname,
+                                                      &raid_slot);
+                       if (rv) {
+                               pr_err("Could not get the devname of cluster\n");
+                               goto abort;
+                       }
+               }
+
                if (strcmp(dv->devname, "failed") == 0 ||
                    strcmp(dv->devname, "faulty") == 0) {
-                       if (dv->disposition != 'A'
-                           && dv->disposition != 'r') {
+                       if (dv->disposition != 'A' && dv->disposition != 'r') {
                                pr_err("%s only meaningful with -r or --re-add, not -%c\n",
                                        dv->devname, dv->disposition);
                                goto abort;
@@ -1306,8 +1410,13 @@ int Manage_subdevs(char *devname, int fd,
                }
 
                if (strcmp(dv->devname, "missing") == 0) {
-                       struct mddev_dev *add_devlist = NULL;
+                       struct mddev_dev *add_devlist;
                        struct mddev_dev **dp;
+                       if (dv->disposition == 'c') {
+                               rv = ioctl(fd, CLUSTERED_DISK_NACK, NULL);
+                               break;
+                       }
+
                        if (dv->disposition != 'A') {
                                pr_err("'missing' only meaningful with --re-add\n");
                                goto abort;
@@ -1386,30 +1495,24 @@ int Manage_subdevs(char *devname, int fd,
                                        goto abort;
                                }
                        }
-               } else if ((dv->disposition == 'r' || dv->disposition == 'f')
-                          && get_maj_min(dv->devname, &mj, &mn)) {
+               } else if ((dv->disposition == 'r' ||
+                           dv->disposition == 'f') &&
+                          get_maj_min(dv->devname, &mj, &mn)) {
                        /* for 'fail' and 'remove', the device might
                         * not exist.
                         */
                        rdev = makedev(mj, mn);
                } else {
-                       struct stat stb;
                        tfd = dev_open(dv->devname, O_RDONLY);
-                       if (tfd >= 0)
-                               fstat(tfd, &stb);
-                       else {
+                       if (tfd >= 0) {
+                               fstat_is_blkdev(tfd, dv->devname, &rdev);
+                               close(tfd);
+                       } else {
                                int open_err = errno;
-                               if (stat(dv->devname, &stb) != 0) {
-                                       pr_err("Cannot find %s: %s\n",
-                                              dv->devname, strerror(errno));
-                                       goto abort;
-                               }
-                               if ((stb.st_mode & S_IFMT) != S_IFBLK) {
+                               if (!stat_is_blkdev(dv->devname, &rdev)) {
                                        if (dv->disposition == 'M')
                                                /* non-fatal. Also improbable */
                                                continue;
-                                       pr_err("%s is not a block device.\n",
-                                              dv->devname);
                                        goto abort;
                                }
                                if (dv->disposition == 'r')
@@ -1426,7 +1529,6 @@ int Manage_subdevs(char *devname, int fd,
                                        goto abort;
                                }
                        }
-                       rdev = stb.st_rdev;
                }
                switch(dv->disposition){
                default:
@@ -1435,17 +1537,31 @@ int Manage_subdevs(char *devname, int fd,
                        goto abort;
                case 'a':
                case 'S': /* --add-spare */
+               case 'j': /* --add-journal */
                case 'A':
                case 'M': /* --re-add missing */
                case 'F': /* --re-add faulty  */
+               case 'c': /* --cluster-confirm */
                        /* add the device */
                        if (subarray) {
                                pr_err("Cannot add disks to a \'member\' array, perform this operation on the parent container\n");
                                goto abort;
                        }
+
+                       /* Let's first try to write re-add to sysfs */
+                       if (rdev != 0 &&
+                           (dv->disposition == 'A' || dv->disposition == 'F')) {
+                               sysfs_init_dev(&devinfo, rdev);
+                               if (sysfs_set_str(&info, &devinfo, "state", "re-add") == 0) {
+                                       pr_err("re-add %s to %s succeed\n",
+                                               dv->devname, info.sys_name);
+                                       break;
+                               }
+                       }
+
                        if (dv->disposition == 'F')
                                /* Need to remove first */
-                               ioctl(fd, HOT_REMOVE_DISK, rdev);
+                               hot_remove_disk(fd, rdev, force);
                        /* Make sure it isn't in use (in 2.6 or later) */
                        tfd = dev_open(dv->devname, O_RDONLY|O_EXCL);
                        if (tfd >= 0) {
@@ -1471,7 +1587,7 @@ int Manage_subdevs(char *devname, int fd,
                        }
                        rv = Manage_add(fd, tfd, dv, tst, &array,
                                        force, verbose, devname, update,
-                                       rdev, array_size);
+                                       rdev, array_size, raid_slot);
                        close(tfd);
                        tfd = -1;
                        if (rv < 0)
@@ -1487,7 +1603,7 @@ int Manage_subdevs(char *devname, int fd,
                                rv = -1;
                        } else
                                rv = Manage_remove(tst, fd, dv, sysfd,
-                                                  rdev, verbose,
+                                                  rdev, verbose, force,
                                                   devname);
                        if (sysfd >= 0)
                                close(sysfd);
@@ -1639,7 +1755,8 @@ int move_spare(char *from_devname, char *to_devname, dev_t devid)
 
        devlist.next = NULL;
        devlist.used = 0;
-       devlist.writemostly = 0;
+       devlist.writemostly = FlagDefault;
+       devlist.failfast = FlagDefault;
        devlist.devname = devname;
        sprintf(devname, "%d:%d", major(devid), minor(devid));
 
@@ -1660,4 +1777,3 @@ int move_spare(char *from_devname, char *to_devname, dev_t devid)
        close(fd2);
        return 0;
 }
-#endif