]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - Manage.c
imsm: move expand verification code into new function
[thirdparty/mdadm.git] / Manage.c
index 8d1bbd9779810bcd86daa91d255e48a354c8fcdd..f54de7c685ef76425eaa86896cb254cd15fec2bf 100644 (file)
--- a/Manage.c
+++ b/Manage.c
@@ -1,7 +1,7 @@
 /*
  * mdadm - manage Linux "md" devices aka RAID arrays.
  *
- * Copyright (C) 2001-2012 Neil Brown <neilb@suse.de>
+ * Copyright (C) 2001-2013 Neil Brown <neilb@suse.de>
  *
  *
  *    This program is free software; you can redistribute it and/or modify
 #include "md_p.h"
 #include <ctype.h>
 
-#define REGISTER_DEV           _IO (MD_MAJOR, 1)
-#define START_MD               _IO (MD_MAJOR, 2)
-#define STOP_MD                        _IO (MD_MAJOR, 3)
-
 int Manage_ro(char *devname, int fd, int readonly)
 {
        /* switch to readonly or rw
@@ -40,17 +36,9 @@ int Manage_ro(char *devname, int fd, int readonly)
         * use RESTART_ARRAY_RW or STOP_ARRAY_RO
         *
         */
-       mdu_array_info_t array;
-#ifndef MDASSEMBLE
        struct mdinfo *mdi;
-#endif
        int rv = 0;
 
-       if (md_get_version(fd) < 9000) {
-               pr_err("need md driver version 0.90.0 or later\n");
-               return 1;
-       }
-#ifndef MDASSEMBLE
        /* If this is an externally-managed array, we need to modify the
         * metadata_version so that mdmon doesn't undo our change.
         */
@@ -94,10 +82,9 @@ int Manage_ro(char *devname, int fd, int readonly)
                }
                goto out;
        }
-#endif
-       if (ioctl(fd, GET_ARRAY_INFO, &array)) {
-               pr_err("%s does not appear to be active.\n",
-                       devname);
+
+       if (!md_array_active(fd)) {
+               pr_err("%s does not appear to be active.\n", devname);
                rv = 1;
                goto out;
        }
@@ -118,15 +105,10 @@ int Manage_ro(char *devname, int fd, int readonly)
                }
        }
 out:
-#ifndef MDASSEMBLE
-       if (mdi)
-               sysfs_free(mdi);
-#endif
+       sysfs_free(mdi);
        return rv;
 }
 
-#ifndef MDASSEMBLE
-
 static void remove_devices(char *devnm, char *path)
 {
        /*
@@ -170,28 +152,20 @@ static void remove_devices(char *devnm, char *path)
        free(path2);
 }
 
-int Manage_run(char *devname, int fd, int verbose)
+int Manage_run(char *devname, int fd, struct context *c)
 {
        /* Run the array.  Array must already be configured
         *  Requires >= 0.90.0
         */
-       mdu_param_t param; /* unused */
-       int rv = 0;
-
-       if (md_get_version(fd) < 9000) {
-               pr_err("need md driver version 0.90.0 or later\n");
-               return 1;
-       }
+       char nm[32], *nmp;
 
-       if (ioctl(fd, RUN_ARRAY, &param)) {
-               if (verbose >= 0)
-                       pr_err("failed to run array %s: %s\n",
-                              devname, strerror(errno));
+       nmp = fd2devnm(fd);
+       if (!nmp) {
+               pr_err("Cannot find %s in sysfs!!\n", devname);
                return 1;
        }
-       if (verbose >= 0)
-               pr_err("started %s\n", devname);
-       return rv;
+       strcpy(nm, nmp);
+       return IncrementalScan(c, nm);
 }
 
 int Manage_stop(char *devname, int fd, int verbose, int will_retry)
@@ -212,18 +186,6 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
        if (will_retry && verbose == 0)
                verbose = -1;
 
-       if (md_get_version(fd) < 9000) {
-               if (ioctl(fd, STOP_MD, 0) == 0)
-                       return 0;
-               pr_err("stopping device %s "
-                      "failed: %s\n",
-                      devname, strerror(errno));
-               return 1;
-       }
-
-       /* If this is an mdmon managed array, just write 'inactive'
-        * to the array state and let mdmon clear up.
-        */
        strcpy(devnm, fd2devnm(fd));
        /* Get EXCL access first.  If this fails, then attempting
         * to stop is probably a bad idea.
@@ -240,13 +202,15 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                container[0] = 0;
        close(fd);
        count = 5;
-       while (((fd = ((devnm[0] == '/')
+       while (((fd = ((devname[0] == '/')
                       ?open(devname, O_RDONLY|O_EXCL)
-                      :open_dev_flags(devnm, O_RDONLY|O_EXCL))) < 0
-               || strcmp(fd2devnm(fd), devnm) != 0)
-              && container[0]
-              && mdmon_running(container)
-              && count) {
+                      :open_dev_flags(devnm, O_RDONLY|O_EXCL))) < 0 ||
+               strcmp(fd2devnm(fd), devnm) != 0) && container[0] &&
+              mdmon_running(container) && count) {
+               /* Can't open, so something might be wrong.  However it
+                * is a container, so we might be racing with mdmon, so
+                * retry for a bit.
+                */
                if (fd >= 0)
                        close(fd);
                flush_mdmon(container);
@@ -256,13 +220,13 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                if (fd >= 0)
                        close(fd);
                if (verbose >= 0)
-                       pr_err("Cannot get exclusive access to %s:"
-                              "Perhaps a running "
-                              "process, mounted filesystem "
-                              "or active volume group?\n",
+                       pr_err("Cannot get exclusive access to %s:Perhaps a running process, mounted filesystem or active volume group?\n",
                               devname);
                return 1;
        }
+       /* If this is an mdmon managed array, just write 'inactive'
+        * to the array state and let mdmon clear up.
+        */
        if (mdi &&
            mdi->array.level > 0 &&
            is_subarray(mdi->text_version)) {
@@ -270,7 +234,7 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                /* This is mdmon managed. */
                close(fd);
 
-               /* As we have an O_EXCL open, any use of the device
+               /* As we had an O_EXCL open, any use of the device
                 * which blocks STOP_ARRAY is probably a transient use,
                 * so it is reasonable to retry for a while - 5 seconds.
                 */
@@ -278,9 +242,9 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                while (count &&
                       (err = sysfs_set_str(mdi, NULL,
                                            "array_state",
-                                           "inactive")) < 0
-                      && errno == EBUSY) {
-                       usleep(200000);
+                                           "inactive")) < 0 &&
+                      errno == EBUSY) {
+                       sleep_for(0, MSEC_TO_NSEC(200), true);
                        count--;
                }
                if (err) {
@@ -297,8 +261,7 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                fd = open_dev_excl(devnm);
                if (fd < 0) {
                        if (verbose >= 0)
-                               pr_err("failed to completely stop %s"
-                                      ": Device is busy\n",
+                               pr_err("failed to completely stop %s: Device is busy\n",
                                       devname);
                        rv = 1;
                        goto out;
@@ -324,9 +287,8 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                            metadata_container_matches(m->metadata_version+9,
                                                       devnm)) {
                                if (verbose >= 0)
-                                       pr_err("Cannot stop container %s: "
-                                              "member %s still active\n",
-                                              devname, m->dev);
+                                       pr_err("Cannot stop container %s: member %s still active\n",
+                                              devname, m->devnm);
                                free_mdstat(mds);
                                rv = 1;
                                goto out;
@@ -345,14 +307,12 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
         *  - unfreeze reshape
         *  - wait on 'sync_completed' for that point to be reached.
         */
-       if (mdi && (mdi->array.level >= 4 && mdi->array.level <= 6) &&
+       if (mdi && is_level456(mdi->array.level) &&
            sysfs_attribute_available(mdi, NULL, "sync_action") &&
            sysfs_attribute_available(mdi, NULL, "reshape_direction") &&
            sysfs_get_str(mdi, NULL, "sync_action", buf, 20) > 0 &&
            strcmp(buf, "reshape\n") == 0 &&
-           sysfs_get_two(mdi, NULL, "raid_disks", &rd1, &rd2) == 2 &&
-           sysfs_set_str(mdi, NULL, "sync_action", "frozen") == 0) {
-               /* Array is frozen */
+           sysfs_get_two(mdi, NULL, "raid_disks", &rd1, &rd2) == 2) {
                unsigned long long position, curr;
                unsigned long long chunk1, chunk2;
                unsigned long long rddiv, chunkdiv;
@@ -363,12 +323,28 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                int delay;
                int scfd;
 
+               delay = 40;
+               while (rd1 > rd2 && delay > 0 &&
+                      sysfs_get_ll(mdi, NULL, "sync_max", &old_sync_max) == 0) {
+                       /* must be in the critical section - wait a bit */
+                       delay -= 1;
+                       sleep_for(0, MSEC_TO_NSEC(100), true);
+               }
+
+               if (sysfs_set_str(mdi, NULL, "sync_action", "frozen") != 0)
+                       goto done;
+               /* Array is frozen */
+
                rd1 -= mdi->array.level == 6 ? 2 : 1;
                rd2 -= mdi->array.level == 6 ? 2 : 1;
                sysfs_get_str(mdi, NULL, "reshape_direction", buf, sizeof(buf));
                if (strncmp(buf, "back", 4) == 0)
                        backwards = 1;
-               sysfs_get_ll(mdi, NULL, "reshape_position", &position);
+               if (sysfs_get_ll(mdi, NULL, "reshape_position", &position) != 0) {
+                       /* reshape must have finished now */
+                       sysfs_set_str(mdi, NULL, "sync_action", "idle");
+                       goto done;
+               }
                sysfs_get_two(mdi, NULL, "chunk_size", &chunk1, &chunk2);
                chunk1 /= 512;
                chunk2 /= 512;
@@ -385,9 +361,20 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                        size &= ~(chunk1-1);
                        size &= ~(chunk2-1);
                        /* rd1 must be smaller */
+                       /* Reshape may have progressed further backwards than
+                        * recorded, so target even further back (hence "-1")
+                        */
                        position = (position / sectors - 1) * sectors;
+                       /* rd1 is always the conversion factor between 'sync'
+                        * position and 'reshape' position.
+                        * We read 1 "new" stripe worth of data from where-ever,
+                        * and when write out that full stripe.
+                        */
                        sync_max = size - position/rd1;
                } else {
+                       /* Reshape will very likely be beyond position, and it may
+                        * be too late to stop at '+1', so aim for '+2'
+                        */
                        position = (position / sectors + 2) * sectors;
                        sync_max = position/rd1;
                }
@@ -410,6 +397,7 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                delay = 3000;
                scfd = sysfs_open(mdi->sys_name, NULL, "sync_completed");
                while (scfd >= 0 && delay > 0 && old_sync_max > 0) {
+                       unsigned long long max_completed;
                        sysfs_get_ll(mdi, NULL, "reshape_position", &curr);
                        sysfs_fd_get_str(scfd, buf, sizeof(buf));
                        if (strncmp(buf, "none", 4) == 0) {
@@ -417,13 +405,16 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                                 * quite started yet.  Wait a bit and
                                 * check  'sync_action' to see.
                                 */
-                               usleep(10000);
+                               sleep_for(0, MSEC_TO_NSEC(10), true);
                                sysfs_get_str(mdi, NULL, "sync_action", buf, sizeof(buf));
                                if (strncmp(buf, "reshape", 7) != 0)
                                        break;
                        }
 
-                       if (sysfs_fd_get_ll(scfd, &completed) == 0 &&
+                       if (sysfs_fd_get_two(scfd, &completed,
+                                            &max_completed) == 2 &&
+                           /* 'completed' sometimes reads as max-uulong */
+                           completed < max_completed &&
                            (completed > sync_max ||
                             (completed == sync_max && curr != position))) {
                                while (completed > sync_max) {
@@ -447,16 +438,16 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                        close(scfd);
 
        }
+done:
 
        /* As we have an O_EXCL open, any use of the device
         * which blocks STOP_ARRAY is probably a transient use,
         * so it is reasonable to retry for a while - 5 seconds.
         */
        count = 25; err = 0;
-       while (count && fd >= 0
-              && (err = ioctl(fd, STOP_ARRAY, NULL)) < 0
-              && errno == EBUSY) {
-               usleep(200000);
+       while (count && fd >= 0 &&
+              (err = ioctl(fd, STOP_ARRAY, NULL)) < 0 && errno == EBUSY) {
+               sleep_for(0, MSEC_TO_NSEC(200), true);
                count --;
        }
        if (fd >= 0 && err) {
@@ -464,21 +455,11 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                        pr_err("failed to stop array %s: %s\n",
                               devname, strerror(errno));
                        if (errno == EBUSY)
-                               cont_err("Perhaps a running "
-                                        "process, mounted filesystem "
-                                        "or active volume group?\n");
+                               cont_err("Perhaps a running process, mounted filesystem or active volume group?\n");
                }
                rv = 1;
                goto out;
        }
-       /* prior to 2.6.28, KOBJ_CHANGE was not sent when an md array
-        * was stopped, so We'll do it here just to be sure.  Drop any
-        * partitions as well...
-        */
-       if (fd >= 0)
-               ioctl(fd, BLKRRPART, 0);
-       if (mdi)
-               sysfs_uevent(mdi, "change");
 
        if (devnm[0] && use_udev()) {
                struct map_ent *mp = map_by_devnm(&map, devnm);
@@ -491,8 +472,7 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
        map_remove(&map, devnm);
        map_unlock(&map);
 out:
-       if (mdi)
-               sysfs_free(mdi);
+       sysfs_free(mdi);
 
        return rv;
 }
@@ -516,14 +496,14 @@ static void add_faulty(struct mddev_dev *dv, int fd, char disp)
        int remaining_disks;
        int i;
 
-       if (ioctl(fd, GET_ARRAY_INFO, &array) != 0)
+       if (md_get_array_info(fd, &array) != 0)
                return;
 
        remaining_disks = array.nr_disks;
        for (i = 0; i < MAX_DISKS && remaining_disks > 0; i++) {
                char buf[40];
                disk.number = i;
-               if (ioctl(fd, GET_DISK_INFO, &disk) != 0)
+               if (md_get_disk_info(fd, &disk) != 0)
                        continue;
                if (disk.major == 0 && disk.minor == 0)
                        continue;
@@ -542,7 +522,7 @@ static void add_detached(struct mddev_dev *dv, int fd, char disp)
        int remaining_disks;
        int i;
 
-       if (ioctl(fd, GET_ARRAY_INFO, &array) != 0)
+       if (md_get_array_info(fd, &array) != 0)
                return;
 
        remaining_disks = array.nr_disks;
@@ -550,7 +530,7 @@ static void add_detached(struct mddev_dev *dv, int fd, char disp)
                char buf[40];
                int sfd;
                disk.number = i;
-               if (ioctl(fd, GET_DISK_INFO, &disk) != 0)
+               if (md_get_disk_info(fd, &disk) != 0)
                        continue;
                if (disk.major == 0 && disk.minor == 0)
                        continue;
@@ -579,7 +559,7 @@ static void add_set(struct mddev_dev *dv, int fd, char set_char)
        int copies, set;
        int i;
 
-       if (ioctl(fd, GET_ARRAY_INFO, &array) != 0)
+       if (md_get_array_info(fd, &array) != 0)
                return;
        if (array.level != 10)
                return;
@@ -592,7 +572,7 @@ static void add_set(struct mddev_dev *dv, int fd, char set_char)
        for (i = 0; i < MAX_DISKS && remaining_disks > 0; i++) {
                char buf[40];
                disk.number = i;
-               if (ioctl(fd, GET_DISK_INFO, &disk) != 0)
+               if (md_get_disk_info(fd, &disk) != 0)
                        continue;
                if (disk.major == 0 && disk.minor == 0)
                        continue;
@@ -607,9 +587,8 @@ static void add_set(struct mddev_dev *dv, int fd, char set_char)
 
 int attempt_re_add(int fd, int tfd, struct mddev_dev *dv,
                   struct supertype *dev_st, struct supertype *tst,
-                  unsigned long rdev,
-                  char *update, char *devname, int verbose,
-                  mdu_array_info_t *array)
+                  unsigned long rdev, enum update_opt update,
+                  char *devname, int verbose, mdu_array_info_t *array)
 {
        struct mdinfo mdi;
        int duuid[4];
@@ -631,43 +610,57 @@ int attempt_re_add(int fd, int tfd, struct mddev_dev *dv,
                 * though.
                 */
                mdu_disk_info_t disc;
-               /* re-add doesn't work for version-1 superblocks
-                * before 2.6.18 :-(
-                */
-               if (array->major_version == 1 &&
-                   get_linux_version() <= 2006018)
-                       goto skip_re_add;
                disc.number = mdi.disk.number;
-               if (ioctl(fd, GET_DISK_INFO, &disc) != 0
-                   || disc.major != 0 || disc.minor != 0
-                       )
+               if (md_get_disk_info(fd, &disc) != 0 ||
+                   disc.major != 0 || disc.minor != 0)
                        goto skip_re_add;
                disc.major = major(rdev);
                disc.minor = minor(rdev);
                disc.number = mdi.disk.number;
                disc.raid_disk = mdi.disk.raid_disk;
                disc.state = mdi.disk.state;
-               if (dv->writemostly == 1)
+               if (array->state & (1 << MD_SB_CLUSTERED)) {
+                       /* extra flags are needed when adding to a cluster as
+                        * there are two cases to distinguish
+                        */
+                       if (dv->disposition == 'c')
+                               disc.state |= (1 << MD_DISK_CANDIDATE);
+                       else
+                               disc.state |= (1 << MD_DISK_CLUSTER_ADD);
+               }
+               if (dv->writemostly == FlagSet)
                        disc.state |= 1 << MD_DISK_WRITEMOSTLY;
-               if (dv->writemostly == 2)
+               if (dv->writemostly == FlagClear)
                        disc.state &= ~(1 << MD_DISK_WRITEMOSTLY);
+               if (dv->failfast == FlagSet)
+                       disc.state |= 1 << MD_DISK_FAILFAST;
+               if (dv->failfast == FlagClear)
+                       disc.state &= ~(1 << MD_DISK_FAILFAST);
                remove_partitions(tfd);
-               if (update || dv->writemostly > 0) {
+               if (update || dv->writemostly != FlagDefault ||
+                   dv->failfast != FlagDefault) {
                        int rv = -1;
                        tfd = dev_open(dv->devname, O_RDWR);
                        if (tfd < 0) {
-                               pr_err("failed to open %s for"
-                                      " superblock update during re-add\n", dv->devname);
+                               pr_err("failed to open %s for superblock update during re-add\n", dv->devname);
                                return -1;
                        }
 
-                       if (dv->writemostly == 1)
+                       if (dv->writemostly == FlagSet)
+                               rv = dev_st->ss->update_super(
+                                       dev_st, NULL, UOPT_SPEC_WRITEMOSTLY,
+                                       devname, verbose, 0, NULL);
+                       if (dv->writemostly == FlagClear)
+                               rv = dev_st->ss->update_super(
+                                       dev_st, NULL, UOPT_SPEC_READWRITE,
+                                       devname, verbose, 0, NULL);
+                       if (dv->failfast == FlagSet)
                                rv = dev_st->ss->update_super(
-                                       dev_st, NULL, "writemostly",
+                                       dev_st, NULL, UOPT_SPEC_FAILFAST,
                                        devname, verbose, 0, NULL);
-                       if (dv->writemostly == 2)
+                       if (dv->failfast == FlagClear)
                                rv = dev_st->ss->update_super(
-                                       dev_st, NULL, "readwrite",
+                                       dev_st, NULL, UOPT_SPEC_NOFAILFAST,
                                        devname, verbose, 0, NULL);
                        if (update)
                                rv = dev_st->ss->update_super(
@@ -677,8 +670,7 @@ int attempt_re_add(int fd, int tfd, struct mddev_dev *dv,
                                rv = dev_st->ss->store_super(dev_st, tfd);
                        close(tfd);
                        if (rv != 0) {
-                               pr_err("failed to update"
-                                      " superblock during re-add\n");
+                               pr_err("failed to update superblock during re-add\n");
                                return -1;
                        }
                }
@@ -704,10 +696,11 @@ skip_re_add:
 int Manage_add(int fd, int tfd, struct mddev_dev *dv,
               struct supertype *tst, mdu_array_info_t *array,
               int force, int verbose, char *devname,
-              char *update, unsigned long rdev, unsigned long long array_size)
+              enum update_opt update, unsigned long rdev,
+              unsigned long long array_size, int raid_slot)
 {
        unsigned long long ldsize;
-       struct supertype *dev_st = NULL;
+       struct supertype *dev_st;
        int j;
        mdu_disk_info_t disc;
 
@@ -718,38 +711,18 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
                        return -1;
        }
 
-       if (tst->ss->validate_geometry(
-                   tst, array->level, array->layout,
-                   array->raid_disks, NULL,
-                   ldsize >> 9, INVALID_SECTORS, NULL, NULL, 0) == 0) {
+       if (tst->ss == &super0 && ldsize > 4ULL*1024*1024*1024*1024) {
+               /* More than 4TB is wasted on v0.90 */
                if (!force) {
-                       pr_err("%s is larger than %s can "
-                              "effectively use.\n"
-                              "       Add --force is you "
-                              "really want to add this device.\n",
+                       pr_err("%s is larger than %s can effectively use.\n"
+                              "       Add --force is you really want to add this device.\n",
                               dv->devname, devname);
                        return -1;
                }
-               pr_err("%s is larger than %s can "
-                      "effectively use.\n"
-                      "       Adding anyway as --force "
-                      "was given.\n",
+               pr_err("%s is larger than %s can effectively use.\n"
+                      "       Adding anyway as --force was given.\n",
                       dv->devname, devname);
        }
-       if (!tst->ss->external &&
-           array->major_version == 0 &&
-           md_get_version(fd)%100 < 2) {
-               if (ioctl(fd, HOT_ADD_DISK, rdev)==0) {
-                       if (verbose >= 0)
-                               pr_err("hot added %s\n",
-                                      dv->devname);
-                       return 1;
-               }
-
-               pr_err("hot add failed for %s: %s\n",
-                      dv->devname, strerror(errno));
-               return -1;
-       }
 
        if (array->not_persistent == 0 || tst->ss->external) {
 
@@ -767,7 +740,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
                                char *dev;
                                int dfd;
                                disc.number = j;
-                               if (ioctl(fd, GET_DISK_INFO, &disc))
+                               if (md_get_disk_info(fd, &disc))
                                        continue;
                                if (disc.major==0 && disc.minor==0)
                                        continue;
@@ -789,7 +762,8 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
                                break;
                        }
                /* FIXME this is a bad test to be using */
-               if (!tst->sb && dv->disposition != 'a') {
+               if (!tst->sb && (dv->disposition != 'a' &&
+                                dv->disposition != 'S')) {
                        /* we are re-adding a device to a
                         * completely dead array - have to depend
                         * on kernel to check
@@ -800,7 +774,9 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
                }
 
                /* Make sure device is large enough */
-               if (tst->ss->avail_size(tst, ldsize/512, INVALID_SECTORS) <
+               if (dv->disposition != 'j' &&  /* skip size check for Journal */
+                   tst->sb &&
+                   tst->ss->avail_size(tst, ldsize/512, INVALID_SECTORS) <
                    array_size) {
                        if (dv->disposition == 'M')
                                return 0;
@@ -815,20 +791,19 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
                 * simply re-add it.
                 */
 
-               if (array->not_persistent==0) {
+               if (array->not_persistent == 0) {
                        dev_st = dup_super(tst);
                        dev_st->ss->load_super(dev_st, tfd, NULL);
-               }
-               if (dev_st && dev_st->sb) {
-                       int rv = attempt_re_add(fd, tfd, dv,
-                                               dev_st, tst,
-                                               rdev,
-                                               update, devname,
-                                               verbose,
-                                               array);
-                       dev_st->ss->free_super(dev_st);
-                       if (rv)
-                               return rv;
+                       if (dev_st->sb && dv->disposition != 'S') {
+                               int rv;
+
+                               rv = attempt_re_add(fd, tfd, dv, dev_st, tst,
+                                                   rdev, update, devname,
+                                                   verbose, array);
+                               dev_st->ss->free_super(dev_st);
+                               if (rv)
+                                       return rv;
+                       }
                }
                if (dv->disposition == 'M') {
                        if (verbose > 0)
@@ -846,9 +821,9 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
                        int d;
                        int found = 0;
 
-                       for (d = 0; d < MAX_DISKS && found < array->active_disks; d++) {
+                       for (d = 0; d < MAX_DISKS && found < array->nr_disks; d++) {
                                disc.number = d;
-                               if (ioctl(fd, GET_DISK_INFO, &disc))
+                               if (md_get_disk_info(fd, &disc))
                                        continue;
                                if (disc.major == 0 && disc.minor == 0)
                                        continue;
@@ -859,6 +834,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
                        }
                        array_failed = !enough(array->level, array->raid_disks,
                                               array->layout, 1, avail);
+                       free(avail);
                } else
                        array_failed = 0;
                if (array_failed) {
@@ -888,7 +864,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
         */
        for (j = array->raid_disks; j < tst->max_devs; j++) {
                disc.number = j;
-               if (ioctl(fd, GET_DISK_INFO, &disc))
+               if (md_get_disk_info(fd, &disc))
                        break;
                if (disc.major==0 && disc.minor==0)
                        break;
@@ -897,12 +873,41 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
        }
        disc.major = major(rdev);
        disc.minor = minor(rdev);
-       disc.number =j;
+       if (raid_slot < 0)
+               disc.number = j;
+       else
+               disc.number = raid_slot;
        disc.state = 0;
+
+       /* only add journal to array that supports journaling */
+       if (dv->disposition == 'j') {
+               struct mdinfo *mdp;
+
+               mdp = sysfs_read(fd, NULL, GET_ARRAY_STATE);
+               if (!mdp) {
+                       pr_err("%s unable to read array state.\n", devname);
+                       return -1;
+               }
+
+               if (mdp->array_state != ARRAY_READONLY) {
+                       sysfs_free(mdp);
+                       pr_err("%s is not readonly, cannot add journal.\n", devname);
+                       return -1;
+               }
+
+               sysfs_free(mdp);
+
+               disc.raid_disk = 0;
+       }
+
        if (array->not_persistent==0) {
                int dfd;
-               if (dv->writemostly == 1)
+               if (dv->disposition == 'j')
+                       disc.state |= (1 << MD_DISK_JOURNAL) | (1 << MD_DISK_SYNC);
+               if (dv->writemostly == FlagSet)
                        disc.state |= 1 << MD_DISK_WRITEMOSTLY;
+               if (dv->failfast == FlagSet)
+                       disc.state |= 1 << MD_DISK_FAILFAST;
                dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
                if (tst->ss->add_to_super(tst, &disc, dfd,
                                          dv->devname, INVALID_SECTORS))
@@ -918,7 +923,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
                for (j = 0; j < tst->max_devs; j++) {
                        mdu_disk_info_t disc2;
                        disc2.number = j;
-                       if (ioctl(fd, GET_DISK_INFO, &disc2))
+                       if (md_get_disk_info(fd, &disc2))
                                continue;
                        if (disc2.major==0 && disc2.minor==0)
                                continue;
@@ -938,8 +943,18 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
                        }
                free(used);
        }
-       if (dv->writemostly == 1)
+
+       if (array->state & (1 << MD_SB_CLUSTERED)) {
+               if (dv->disposition == 'c')
+                       disc.state |= (1 << MD_DISK_CANDIDATE);
+               else
+                       disc.state |= (1 << MD_DISK_CLUSTER_ADD);
+       }
+
+       if (dv->writemostly == FlagSet)
                disc.state |= (1 << MD_DISK_WRITEMOSTLY);
+       if (dv->failfast == FlagSet)
+               disc.state |= (1 << MD_DISK_FAILFAST);
        if (tst->ss->external) {
                /* add a disk
                 * to an external metadata container */
@@ -953,26 +968,28 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
 
                container_fd = open_dev_excl(devnm);
                if (container_fd < 0) {
-                       pr_err("add failed for %s:"
-                              " could not get exclusive access to container\n",
+                       pr_err("add failed for %s: could not get exclusive access to container\n",
                               dv->devname);
                        tst->ss->free_super(tst);
                        return -1;
                }
 
+               /* Check if metadata handler is able to accept the drive */
+               if (!tst->ss->validate_geometry(tst, LEVEL_CONTAINER, 0, 1, NULL,
+                   0, 0, dv->devname, NULL, 0, 1)) {
+                       close(container_fd);
+                       return -1;
+               }
+
                Kill(dv->devname, NULL, 0, -1, 0);
                dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
-               if (mdmon_running(tst->container_devnm))
-                       tst->update_tail = &tst->updates;
                if (tst->ss->add_to_super(tst, &disc, dfd,
                                          dv->devname, INVALID_SECTORS)) {
                        close(dfd);
                        close(container_fd);
                        return -1;
                }
-               if (tst->update_tail)
-                       flush_metadata_updates(tst);
-               else
+               if (!mdmon_running(tst->container_devnm))
                        tst->ss->sync_metadata(tst);
 
                sra = sysfs_read(container_fd, NULL, 0);
@@ -993,8 +1010,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
                 * would block add_disk */
                tst->ss->free_super(tst);
                if (sysfs_add_disk(sra, &new_mdi, 0) != 0) {
-                       pr_err("add new device to external metadata"
-                              " failed for %s\n", dv->devname);
+                       pr_err("add new device to external metadata failed for %s\n", dv->devname);
                        close(container_fd);
                        sysfs_free(sra);
                        return -1;
@@ -1005,10 +1021,20 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
        } else {
                tst->ss->free_super(tst);
                if (ioctl(fd, ADD_NEW_DISK, &disc)) {
-                       pr_err("add new device failed for %s as %d: %s\n",
-                              dv->devname, j, strerror(errno));
+                       if (dv->disposition == 'j')
+                               pr_err("Failed to hot add %s as journal, "
+                                      "please try restart %s.\n", dv->devname, devname);
+                       else
+                               pr_err("add new device failed for %s as %d: %s\n",
+                                      dv->devname, j, strerror(errno));
                        return -1;
                }
+               if (dv->disposition == 'j') {
+                       pr_err("Journal added successfully, making %s read-write\n", devname);
+                       if (Manage_ro(devname, fd, -1))
+                               pr_err("Failed to make %s read-write\n", devname);
+               }
+
        }
        if (verbose >= 0)
                pr_err("added %s\n", dv->devname);
@@ -1016,7 +1042,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
 }
 
 int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
-                 int sysfd, unsigned long rdev, int verbose, char *devname)
+                 int sysfd, unsigned long rdev, int force, int verbose, char *devname)
 {
        int lfd = -1;
        int err;
@@ -1036,8 +1062,7 @@ int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
                strcpy(devnm, fd2devnm(fd));
                lfd = open_dev_excl(devnm);
                if (lfd < 0) {
-                       pr_err("Cannot get exclusive access "
-                              " to container - odd\n");
+                       pr_err("Cannot get exclusive access  to container - odd\n");
                        return -1;
                }
                /* We may not be able to check on holders in
@@ -1049,19 +1074,34 @@ int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
                 */
                if (rdev == 0)
                        ret = -1;
-               else
-                       ret = sysfs_unique_holder(devnm, rdev);
-               if (ret == 0) {
-                       pr_err("%s is not a member, cannot remove.\n",
-                              dv->devname);
-                       close(lfd);
-                       return -1;
-               }
-               if (ret >= 2) {
-                       pr_err("%s is still in use, cannot remove.\n",
-                              dv->devname);
-                       close(lfd);
-                       return -1;
+               else {
+                       /*
+                        * The drive has already been set to 'faulty', however
+                        * monitor might not have had time to process it and the
+                        * drive might still have an entry in the 'holders'
+                        * directory. Try a few times to avoid a false error
+                        */
+                       int count = 20;
+
+                       do {
+                               ret = sysfs_unique_holder(devnm, rdev);
+                               if (ret < 2)
+                                       break;
+                               sleep_for(0, MSEC_TO_NSEC(100), true);
+                       } while (--count > 0);
+
+                       if (ret == 0) {
+                               pr_err("%s is not a member, cannot remove.\n",
+                                       dv->devname);
+                               close(lfd);
+                               return -1;
+                       }
+                       if (ret >= 2) {
+                               pr_err("%s is still in use, cannot remove.\n",
+                                       dv->devname);
+                               close(lfd);
+                               return -1;
+                       }
                }
        }
        /* FIXME check that it is a current member */
@@ -1069,13 +1109,9 @@ int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
                /* device has been removed and we don't know
                 * the major:minor number
                 */
-               int n = write(sysfd, "remove", 6);
-               if (n != 6)
-                       err = -1;
-               else
-                       err = 0;
+               err = sys_hot_remove_disk(sysfd, force);
        } else {
-               err = ioctl(fd, HOT_REMOVE_DISK, rdev);
+               err = hot_remove_disk(fd, rdev, force);
                if (err && errno == ENODEV) {
                        /* Old kernels rejected this if no personality
                         * is registered */
@@ -1092,13 +1128,11 @@ int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
                                                    "state", "remove");
                        else
                                err = -1;
-                       if (sra)
-                               sysfs_free(sra);
+                       sysfs_free(sra);
                }
        }
        if (err) {
-               pr_err("hot remove failed "
-                      "for %s: %s\n",  dv->devname,
+               pr_err("hot remove failed for %s: %s\n",        dv->devname,
                       strerror(errno));
                if (lfd >= 0)
                        close(lfd);
@@ -1233,57 +1267,120 @@ int Manage_with(struct supertype *tst, int fd, struct mddev_dev *dv,
        return -1;
 }
 
+/**
+ * is_remove_safe() - Check if remove is safe.
+ * @array: Array info.
+ * @fd: Array file descriptor.
+ * @devname: Name of device to remove.
+ * @verbose: Verbose.
+ *
+ * The function determines if array will be operational
+ * after removing &devname.
+ *
+ * Return: True if array will be operational, false otherwise.
+ */
+bool is_remove_safe(mdu_array_info_t *array, const int fd, char *devname, const int verbose)
+{
+       dev_t devid = devnm2devid(devname + 5);
+       struct mdinfo *mdi = sysfs_read(fd, NULL, GET_DEVS | GET_DISKS | GET_STATE);
+
+       if (!mdi) {
+               if (verbose)
+                       pr_err("Failed to read sysfs attributes for %s\n", devname);
+               return false;
+       }
+
+       char *avail = xcalloc(array->raid_disks, sizeof(char));
+
+       for (mdi = mdi->devs; mdi; mdi = mdi->next) {
+               if (mdi->disk.raid_disk < 0)
+                       continue;
+               if (!(mdi->disk.state & (1 << MD_DISK_SYNC)))
+                       continue;
+               if (makedev(mdi->disk.major, mdi->disk.minor) == devid)
+                       continue;
+               avail[mdi->disk.raid_disk] = 1;
+       }
+       sysfs_free(mdi);
+
+       bool is_enough = enough(array->level, array->raid_disks,
+                               array->layout, 1, avail);
+
+       free(avail);
+       return is_enough;
+}
+
+/**
+ * Manage_subdevs() - Execute operation depending on devmode.
+ *
+ * @devname: name of the device.
+ * @fd: file descriptor.
+ * @devlist: list of sub-devices to manage.
+ * @verbose: verbose level.
+ * @test: test flag.
+ * @update: type of update.
+ * @force: force flag.
+ *
+ * This function executes operation defined by devmode
+ * for each dev from devlist.
+ * Devmode can be:
+ * 'a' - add the device
+ * 'S' - add the device as a spare - don't try re-add
+ * 'j' - add the device as a journal device
+ * 'A' - re-add the device
+ * 'r' - remove the device: HOT_REMOVE_DISK
+ *       device can be 'faulty' or 'detached' in which case all
+ *       matching devices are removed.
+ * 'f' - set the device faulty SET_DISK_FAULTY
+ *       device can be 'detached' in which case any device that
+ *       is inaccessible will be marked faulty.
+ * 'I' - remove device by using incremental fail
+ *       which is executed when device is removed surprisingly.
+ * 'R' - mark this device as wanting replacement.
+ * 'W' - this device is added if necessary and activated as
+ *       a replacement for a previous 'R' device.
+ * -----
+ * 'w' - 'W' will be changed to 'w' when it is paired with
+ *       a 'R' device.  If a 'W' is found while walking the list
+ *       it must be unpaired, and is an error.
+ * 'M' - this is created by a 'missing' target.  It is a slight
+ *       variant on 'A'
+ * 'F' - Another variant of 'A', where the device was faulty
+ *       so must be removed from the array first.
+ * 'c' - confirm the device as found (for clustered environments)
+ *
+ * For 'f' and 'r', the device can also be a kernel-internal
+ * name such as 'sdb'.
+ *
+ * Return: 0 on success, otherwise 1 or 2.
+ */
 int Manage_subdevs(char *devname, int fd,
                   struct mddev_dev *devlist, int verbose, int test,
-                  char *update, int force)
+                  enum update_opt update, int force)
 {
-       /* Do something to each dev.
-        * devmode can be
-        *  'a' - add the device
-        *         try HOT_ADD_DISK
-        *         If that fails EINVAL, try ADD_NEW_DISK
-        *  'A' - re-add the device
-        *  'r' - remove the device: HOT_REMOVE_DISK
-        *        device can be 'faulty' or 'detached' in which case all
-        *        matching devices are removed.
-        *  'f' - set the device faulty SET_DISK_FAULTY
-        *        device can be 'detached' in which case any device that
-        *        is inaccessible will be marked faulty.
-        *  'R' - mark this device as wanting replacement.
-        *  'W' - this device is added if necessary and activated as
-        *        a replacement for a previous 'R' device.
-        * -----
-        *  'w' - 'W' will be changed to 'w' when it is paired with
-        *        a 'R' device.  If a 'W' is found while walking the list
-        *        it must be unpaired, and is an error.
-        *  'M' - this is created by a 'missing' target.  It is a slight
-        *        variant on 'A'
-        *  'F' - Another variant of 'A', where the device was faulty
-        *        so must be removed from the array first.
-        *
-        * For 'f' and 'r', the device can also be a kernel-internal
-        * name such as 'sdb'.
-        */
        mdu_array_info_t array;
        unsigned long long array_size;
        struct mddev_dev *dv;
-       struct stat stb;
        int tfd = -1;
        struct supertype *tst;
        char *subarray = NULL;
        int sysfd = -1;
        int count = 0; /* number of actions taken */
        struct mdinfo info;
+       struct mdinfo devinfo;
        int frozen = 0;
        int busy = 0;
+       int raid_slot = -1;
 
-       if (ioctl(fd, GET_ARRAY_INFO, &array)) {
-               pr_err("Cannot get array info for %s\n",
-                       devname);
+       if (sysfs_init(&info, fd, NULL)) {
+               pr_err("sysfs not availabile for %s\n", devname);
                goto abort;
        }
-       sysfs_init(&info, fd, NULL);
 
+       if (md_get_array_info(fd, &array)) {
+               pr_err("Cannot get array info for %s\n", devname);
+               goto abort;
+       }
        /* array.size is only 32 bits and may be truncated.
         * So read from sysfs if possible, and record number of sectors
         */
@@ -1299,16 +1396,26 @@ int Manage_subdevs(char *devname, int fd,
                goto abort;
        }
 
-       stb.st_rdev = 0;
        for (dv = devlist; dv; dv = dv->next) {
+               dev_t rdev = 0; /* device to add/remove etc */
                int rv;
+               int mj,mn;
+
+               raid_slot = -1;
+               if (dv->disposition == 'c') {
+                       rv = parse_cluster_confirm_arg(dv->devname,
+                                                      &dv->devname,
+                                                      &raid_slot);
+                       if (rv) {
+                               pr_err("Could not get the devname of cluster\n");
+                               goto abort;
+                       }
+               }
 
                if (strcmp(dv->devname, "failed") == 0 ||
                    strcmp(dv->devname, "faulty") == 0) {
-                       if (dv->disposition != 'A'
-                           && dv->disposition != 'r') {
-                               pr_err("%s only meaningful "
-                                       "with -r or --re-add, not -%c\n",
+                       if (dv->disposition != 'A' && dv->disposition != 'r') {
+                               pr_err("%s only meaningful with -r or --re-add, not -%c\n",
                                        dv->devname, dv->disposition);
                                goto abort;
                        }
@@ -1318,8 +1425,7 @@ int Manage_subdevs(char *devname, int fd,
                }
                if (strcmp(dv->devname, "detached") == 0) {
                        if (dv->disposition != 'r' && dv->disposition != 'f') {
-                               pr_err("%s only meaningful "
-                                       "with -r of -f, not -%c\n",
+                               pr_err("%s only meaningful with -r of -f, not -%c\n",
                                        dv->devname, dv->disposition);
                                goto abort;
                        }
@@ -1328,16 +1434,20 @@ int Manage_subdevs(char *devname, int fd,
                }
 
                if (strcmp(dv->devname, "missing") == 0) {
-                       struct mddev_dev *add_devlist = NULL;
+                       struct mddev_dev *add_devlist;
                        struct mddev_dev **dp;
+                       if (dv->disposition == 'c') {
+                               rv = ioctl(fd, CLUSTERED_DISK_NACK, NULL);
+                               break;
+                       }
+
                        if (dv->disposition != 'A') {
-                               pr_err("'missing' only meaningful "
-                                      "with --re-add\n");
+                               pr_err("'missing' only meaningful with --re-add\n");
                                goto abort;
                        }
                        add_devlist = conf_get_devs();
                        if (add_devlist == NULL) {
-                               pr_err("no devices to scan for missing members.");
+                               pr_err("no devices to scan for missing members.\n");
                                continue;
                        }
                        for (dp = &add_devlist; *dp; dp = & (*dp)->next)
@@ -1383,9 +1493,9 @@ int Manage_subdevs(char *devname, int fd,
                        /* Assume this is a kernel-internal name like 'sda1' */
                        int found = 0;
                        char dname[55];
-                       if (dv->disposition != 'r' && dv->disposition != 'f') {
-                               pr_err("%s only meaningful "
-                                       "with -r or -f, not -%c\n",
+                       if (dv->disposition != 'r' && dv->disposition != 'f' &&
+                           dv->disposition != 'I') {
+                               pr_err("%s only meaningful with -r, -f or -I, not -%c\n",
                                        dv->devname, dv->disposition);
                                goto abort;
                        }
@@ -1394,10 +1504,9 @@ int Manage_subdevs(char *devname, int fd,
                        sysfd = sysfs_open(fd2devnm(fd), dname, "block/dev");
                        if (sysfd >= 0) {
                                char dn[20];
-                               int mj,mn;
                                if (sysfs_fd_get_str(sysfd, dn, 20) > 0 &&
                                    sscanf(dn, "%d:%d", &mj,&mn) == 2) {
-                                       stb.st_rdev = makedev(mj,mn);
+                                       rdev = makedev(mj,mn);
                                        found = 1;
                                }
                                close(sysfd);
@@ -1406,29 +1515,29 @@ int Manage_subdevs(char *devname, int fd,
                        if (!found) {
                                sysfd = sysfs_open(fd2devnm(fd), dname, "state");
                                if (sysfd < 0) {
-                                       pr_err("%s does not appear "
-                                               "to be a component of %s\n",
+                                       pr_err("%s does not appear to be a component of %s\n",
                                                dv->devname, devname);
                                        goto abort;
                                }
                        }
+               } else if ((dv->disposition == 'r' ||
+                           dv->disposition == 'f') &&
+                          get_maj_min(dv->devname, &mj, &mn)) {
+                       /* for 'fail' and 'remove', the device might
+                        * not exist.
+                        */
+                       rdev = makedev(mj, mn);
                } else {
                        tfd = dev_open(dv->devname, O_RDONLY);
-                       if (tfd >= 0)
-                               fstat(tfd, &stb);
-                       else {
+                       if (tfd >= 0) {
+                               fstat_is_blkdev(tfd, dv->devname, &rdev);
+                               close(tfd);
+                       } else {
                                int open_err = errno;
-                               if (stat(dv->devname, &stb) != 0) {
-                                       pr_err("Cannot find %s: %s\n",
-                                              dv->devname, strerror(errno));
-                                       goto abort;
-                               }
-                               if ((stb.st_mode & S_IFMT) != S_IFBLK) {
+                               if (!stat_is_blkdev(dv->devname, &rdev)) {
                                        if (dv->disposition == 'M')
                                                /* non-fatal. Also improbable */
                                                continue;
-                                       pr_err("%s is not a block device.\n",
-                                              dv->devname);
                                        goto abort;
                                }
                                if (dv->disposition == 'r')
@@ -1452,20 +1561,32 @@ int Manage_subdevs(char *devname, int fd,
                                dv->devname, dv->disposition);
                        goto abort;
                case 'a':
+               case 'S': /* --add-spare */
+               case 'j': /* --add-journal */
                case 'A':
                case 'M': /* --re-add missing */
                case 'F': /* --re-add faulty  */
+               case 'c': /* --cluster-confirm */
                        /* add the device */
                        if (subarray) {
-                               pr_err("Cannot add disks to a"
-                                       " \'member\' array, perform this"
-                                       " operation on the parent container\n");
+                               pr_err("Cannot add disks to a \'member\' array, perform this operation on the parent container\n");
                                goto abort;
                        }
+
+                       /* Let's first try to write re-add to sysfs */
+                       if (rdev != 0 &&
+                           (dv->disposition == 'A' || dv->disposition == 'F')) {
+                               sysfs_init_dev(&devinfo, rdev);
+                               if (sysfs_set_str(&info, &devinfo, "state", "re-add") == 0) {
+                                       pr_err("re-add %s to %s succeed\n",
+                                               dv->devname, info.sys_name);
+                                       break;
+                               }
+                       }
+
                        if (dv->disposition == 'F')
                                /* Need to remove first */
-                               ioctl(fd, HOT_REMOVE_DISK,
-                                     (unsigned long)stb.st_rdev);
+                               hot_remove_disk(fd, rdev, force);
                        /* Make sure it isn't in use (in 2.6 or later) */
                        tfd = dev_open(dv->devname, O_RDONLY|O_EXCL);
                        if (tfd >= 0) {
@@ -1491,7 +1612,7 @@ int Manage_subdevs(char *devname, int fd,
                        }
                        rv = Manage_add(fd, tfd, dv, tst, &array,
                                        force, verbose, devname, update,
-                                       stb.st_rdev, array_size);
+                                       rdev, array_size, raid_slot);
                        close(tfd);
                        tfd = -1;
                        if (rv < 0)
@@ -1503,13 +1624,11 @@ int Manage_subdevs(char *devname, int fd,
                case 'r':
                        /* hot remove */
                        if (subarray) {
-                               pr_err("Cannot remove disks from a"
-                                       " \'member\' array, perform this"
-                                       " operation on the parent container\n");
+                               pr_err("Cannot remove disks from a \'member\' array, perform this operation on the parent container\n");
                                rv = -1;
                        } else
                                rv = Manage_remove(tst, fd, dv, sysfd,
-                                                  stb.st_rdev, verbose,
+                                                  rdev, verbose, force,
                                                   devname);
                        if (sysfd >= 0)
                                close(sysfd);
@@ -1521,10 +1640,17 @@ int Manage_subdevs(char *devname, int fd,
                        break;
 
                case 'f': /* set faulty */
-                       /* FIXME check current member */
+                       if (!is_remove_safe(&array, fd, dv->devname, verbose)) {
+                               pr_err("Cannot remove %s from %s, array will be failed.\n",
+                                      dv->devname, devname);
+                               if (sysfd >= 0)
+                                       close(sysfd);
+                               goto abort;
+                       }
+               case 'I': /* incremental fail */
                        if ((sysfd >= 0 && write(sysfd, "faulty", 6) != 6) ||
                            (sysfd < 0 && ioctl(fd, SET_DISK_FAULTY,
-                                               (unsigned long) stb.st_rdev))) {
+                                               rdev))) {
                                if (errno == EBUSY)
                                        busy = 1;
                                pr_err("set device faulty failed for %s:  %s\n",
@@ -1543,9 +1669,7 @@ int Manage_subdevs(char *devname, int fd,
                        break;
                case 'R': /* Mark as replaceable */
                        if (subarray) {
-                               pr_err("Cannot replace disks in a"
-                                       " \'member\' array, perform this"
-                                       " operation on the parent container\n");
+                               pr_err("Cannot replace disks in a \'member\' array, perform this operation on the parent container\n");
                                rv = -1;
                        } else {
                                if (!frozen) {
@@ -1555,7 +1679,7 @@ int Manage_subdevs(char *devname, int fd,
                                                frozen = -1;
                                }
                                rv = Manage_replace(tst, fd, dv,
-                                                   stb.st_rdev, verbose,
+                                                   rdev, verbose,
                                                    devname);
                        }
                        if (rv < 0)
@@ -1569,7 +1693,7 @@ int Manage_subdevs(char *devname, int fd,
                        goto abort;
                case 'w': /* --with device which was matched */
                        rv = Manage_with(tst, fd, dv,
-                                        stb.st_rdev, verbose, devname);
+                                        rdev, verbose, devname);
                        if (rv < 0)
                                goto abort;
                        break;
@@ -1600,10 +1724,13 @@ int autodetect(void)
        return rv;
 }
 
-int Update_subarray(char *dev, char *subarray, char *update, struct mddev_ident *ident, int verbose)
+int Update_subarray(char *dev, char *subarray, enum update_opt update,
+                   struct mddev_ident *ident, int verbose)
 {
        struct supertype supertype, *st = &supertype;
        int fd, rv = 2;
+       struct mdinfo *info = NULL;
+       char *update_verb = map_num(update_options, update);
 
        memset(st, 0, sizeof(*st));
 
@@ -1618,25 +1745,41 @@ int Update_subarray(char *dev, char *subarray, char *update, struct mddev_ident
                goto free_super;
        }
 
+       if (is_subarray_active(subarray, st->devnm)) {
+               if (verbose >= 0)
+                       pr_err("Subarray %s in %s is active, cannot update %s\n",
+                               subarray, dev, update_verb);
+               goto free_super;
+       }
+
        if (mdmon_running(st->devnm))
                st->update_tail = &st->updates;
 
+       info = st->ss->container_content(st, subarray);
+
+       if (update == UOPT_PPL && !is_level456(info->array.level)) {
+               pr_err("RWH policy ppl is supported only for raid4, raid5 and raid6.\n");
+               goto free_super;
+       }
+
        rv = st->ss->update_subarray(st, subarray, update, ident);
 
        if (rv) {
                if (verbose >= 0)
                        pr_err("Failed to update %s of subarray-%s in %s\n",
-                               update, subarray, dev);
+                               update_verb, subarray, dev);
        } else if (st->update_tail)
                flush_metadata_updates(st);
        else
                st->ss->sync_metadata(st);
 
-       if (rv == 0 && strcmp(update, "name") == 0 && verbose >= 0)
+       if (rv == 0 && update == UOPT_NAME && verbose >= 0)
                pr_err("Updated subarray-%s name from %s, UUIDs may have changed\n",
                       subarray, dev);
 
- free_super:
+free_super:
+       if (info)
+               free(info);
        st->ss->free_super(st);
        close(fd);
 
@@ -1656,21 +1799,25 @@ int move_spare(char *from_devname, char *to_devname, dev_t devid)
        int fd2 = open(from_devname, O_RDONLY);
 
        if (fd1 < 0 || fd2 < 0) {
-               if (fd1>=0) close(fd1);
-               if (fd2>=0) close(fd2);
+               if (fd1 >= 0)
+                       close(fd1);
+               if (fd2 >= 0)
+                       close(fd2);
                return 0;
        }
 
        devlist.next = NULL;
        devlist.used = 0;
-       devlist.writemostly = 0;
+       devlist.writemostly = FlagDefault;
+       devlist.failfast = FlagDefault;
        devlist.devname = devname;
        sprintf(devname, "%d:%d", major(devid), minor(devid));
 
        devlist.disposition = 'r';
-       if (Manage_subdevs(from_devname, fd2, &devlist, -1, 0, NULL, 0) == 0) {
+       if (Manage_subdevs(from_devname, fd2, &devlist, -1, 0, UOPT_UNDEFINED, 0) == 0) {
                devlist.disposition = 'a';
-               if (Manage_subdevs(to_devname, fd1, &devlist, -1, 0, NULL, 0) == 0) {
+               if (Manage_subdevs(to_devname, fd1, &devlist, -1, 0,
+                                  UOPT_UNDEFINED, 0) == 0) {
                        /* make sure manager is aware of changes */
                        ping_manager(to_devname);
                        ping_manager(from_devname);
@@ -1678,10 +1825,11 @@ int move_spare(char *from_devname, char *to_devname, dev_t devid)
                        close(fd2);
                        return 1;
                }
-               else Manage_subdevs(from_devname, fd2, &devlist, -1, 0, NULL, 0);
+               else
+                       Manage_subdevs(from_devname, fd2, &devlist,
+                                      -1, 0, UOPT_UNDEFINED, 0);
        }
        close(fd1);
        close(fd2);
        return 0;
 }
-#endif