]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - Manage.c
Allow data-offset to be specified per-device for create
[thirdparty/mdadm.git] / Manage.c
index a679c24ce571d6550a28fc09099056a4793e37f8..296feeb7f6f424adc31831212a91dfa323a6fa8f 100644 (file)
--- a/Manage.c
+++ b/Manage.c
@@ -1,7 +1,7 @@
 /*
  * mdadm - manage Linux "md" devices aka RAID arrays.
  *
- * Copyright (C) 2001-2009 Neil Brown <neilb@suse.de>
+ * Copyright (C) 2001-2012 Neil Brown <neilb@suse.de>
  *
  *
  *    This program is free software; you can redistribute it and/or modify
@@ -44,13 +44,14 @@ int Manage_ro(char *devname, int fd, int readonly)
 #ifndef MDASSEMBLE
        struct mdinfo *mdi;
 #endif
+       int rv = 0;
 
        if (md_get_version(fd) < 9000) {
-               fprintf(stderr, Name ": need md driver version 0.90.0 or later\n");
+               pr_err("need md driver version 0.90.0 or later\n");
                return 1;
        }
 #ifndef MDASSEMBLE
-       /* If this is an externally-manage array, we need to modify the
+       /* If this is an externally-managed array, we need to modify the
         * metadata_version so that mdmon doesn't undo our change.
         */
        mdi = sysfs_read(fd, -1, GET_LEVEL|GET_VERSION);
@@ -70,12 +71,13 @@ int Manage_ro(char *devname, int fd, int readonly)
                        rv = sysfs_set_str(mdi, NULL, "array_state", "readonly");
 
                        if (rv < 0) {
-                               fprintf(stderr, Name ": failed to set readonly for %s: %s\n",
+                               pr_err("failed to set readonly for %s: %s\n",
                                        devname, strerror(errno));
 
                                vers[9] = mdi->text_version[0];
                                sysfs_set_str(mdi, NULL, "metadata_version", vers);
-                               return 1;
+                               rv = 1;
+                               goto out;
                        }
                } else {
                        char *cp;
@@ -84,42 +86,50 @@ int Manage_ro(char *devname, int fd, int readonly)
                        sysfs_set_str(mdi, NULL, "metadata_version", vers);
 
                        cp = strchr(vers+10, '/');
-                       if (*cp)
+                       if (cp)
                                *cp = 0;
                        ping_monitor(vers+10);
                        if (mdi->array.level <= 0)
                                sysfs_set_str(mdi, NULL, "array_state", "active");
                }
-               return 0;
+               goto out;
        }
 #endif
        if (ioctl(fd, GET_ARRAY_INFO, &array)) {
-               fprintf(stderr, Name ": %s does not appear to be active.\n",
+               pr_err("%s does not appear to be active.\n",
                        devname);
-               return 1;
+               rv = 1;
+               goto out;
        }
 
-       if (readonly>0) {
+       if (readonly > 0) {
                if (ioctl(fd, STOP_ARRAY_RO, NULL)) {
-                       fprintf(stderr, Name ": failed to set readonly for %s: %s\n",
+                       pr_err("failed to set readonly for %s: %s\n",
                                devname, strerror(errno));
-                       return 1;
+                       rv = 1;
+                       goto out;
                }
        } else if (readonly < 0) {
                if (ioctl(fd, RESTART_ARRAY_RW, NULL)) {
-                       fprintf(stderr, Name ": failed to set writable for %s: %s\n",
+                       pr_err("failed to set writable for %s: %s\n",
                                devname, strerror(errno));
-                       return 1;
+                       rv = 1;
+                       goto out;
                }
        }
-       return 0;
+out:
+#ifndef MDASSEMBLE
+       if (mdi)
+               sysfs_free(mdi);
+#endif
+       return rv;
 }
 
 #ifndef MDASSEMBLE
 
 static void remove_devices(int devnum, char *path)
 {
-       /* 
+       /*
         * Remove names at 'path' - possibly with
         * partition suffixes - which link to the 'standard'
         * name for devnum.  These were probably created
@@ -142,10 +152,10 @@ static void remove_devices(int devnum, char *path)
                sprintf(base, "/dev/md_d%d", -1-devnum);
        be = base + strlen(base);
 
-       path2 = malloc(strlen(path)+20);
+       path2 = xmalloc(strlen(path)+20);
        strcpy(path2, path);
        pe = path2 + strlen(path2);
-       
+
        for (part = 0; part < 16; part++) {
                if (part) {
                        sprintf(be, "p%d", part);
@@ -156,53 +166,50 @@ static void remove_devices(int devnum, char *path)
                                sprintf(pe, "%d", part);
                }
                n = readlink(path2, link, sizeof(link));
-               if (n && (int)strlen(base) == n &&
+               if (n > 0 && (int)strlen(base) == n &&
                    strncmp(link, base, n) == 0)
                        unlink(path2);
        }
        free(path2);
 }
-       
 
-int Manage_runstop(char *devname, int fd, int runstop, int quiet)
+int Manage_runstop(char *devname, int fd, int runstop,
+                  int verbose, int will_retry)
 {
-       /* Run or stop the array. array must already be configured
-        * required >= 0.90.0
-        * Only print failure messages if quiet == 0;
-        * quiet > 0 means really be quiet
-        * quiet < 0 means we will try again if it fails.
+       /* Run or stop the array.  Array must already be configured
+        * 'Run' requires >= 0.90.0
+        * 'will_retry' is only relevant for 'stop', and means
+        * that error messages are not wanted.
         */
        mdu_param_t param; /* unused */
+       int rv = 0;
+
+       if (will_retry && verbose == 0)
+               verbose = -1;
 
        if (runstop == -1 && md_get_version(fd) < 9000) {
-               if (ioctl(fd, STOP_MD, 0)) {
-                       if (quiet == 0) fprintf(stderr,
-                                               Name ": stopping device %s "
-                                               "failed: %s\n",
-                                               devname, strerror(errno));
-                       return 1;
-               }
+               if (ioctl(fd, STOP_MD, 0) == 0)
+                       return 0;
+               pr_err("stopping device %s "
+                      "failed: %s\n",
+                      devname, strerror(errno));
+               return 1;
        }
 
        if (md_get_version(fd) < 9000) {
-               fprintf(stderr, Name ": need md driver version 0.90.0 or later\n");
-               return 1;
-       }
-       /*
-       if (ioctl(fd, GET_ARRAY_INFO, &array)) {
-               fprintf(stderr, Name ": %s does not appear to be active.\n",
-                       devname);
+               pr_err("need md driver version 0.90.0 or later\n");
                return 1;
        }
-       */
-       if (runstop>0) {
+
+       if (runstop > 0) {
                if (ioctl(fd, RUN_ARRAY, &param)) {
-                       fprintf(stderr, Name ": failed to run array %s: %s\n",
-                               devname, strerror(errno));
+                       if (verbose >= 0)
+                               pr_err("failed to run array %s: %s\n",
+                                      devname, strerror(errno));
                        return 1;
                }
-               if (quiet <= 0)
-                       fprintf(stderr, Name ": started %s\n", devname);
+               if (verbose >= 0)
+                       pr_err("started %s\n", devname);
        } else if (runstop < 0){
                struct map_ent *map = NULL;
                struct stat stb;
@@ -222,25 +229,41 @@ int Manage_runstop(char *devname, int fd, int runstop, int quiet)
                if (fd < 0 || fd2devnum(fd) != devnum) {
                        if (fd >= 0)
                                close(fd);
-                       fprintf(stderr,
-                               Name ": Cannot get exclusive access to %s:"
-                               " possibly it is still in use.\n",
-                               devname);
+                       if (verbose >= 0)
+                               pr_err("Cannot get exclusive access to %s:"
+                                      "Perhaps a running "
+                                      "process, mounted filesystem "
+                                      "or active volume group?\n",
+                                      devname);
                        return 1;
                }
                mdi = sysfs_read(fd, -1, GET_LEVEL|GET_VERSION);
                if (mdi &&
                    mdi->array.level > 0 &&
                    is_subarray(mdi->text_version)) {
+                       int err;
                        /* This is mdmon managed. */
                        close(fd);
-                       if (sysfs_set_str(mdi, NULL,
-                                         "array_state", "inactive") < 0) {
-                               if (quiet == 0)
-                                       fprintf(stderr, Name
-                                               ": failed to stop array %s: %s\n",
-                                               devname, strerror(errno));
-                               return 1;
+
+                       /* As we have an O_EXCL open, any use of the device
+                        * which blocks STOP_ARRAY is probably a transient use,
+                        * so it is reasonable to retry for a while - 5 seconds.
+                        */
+                       count = 25;
+                       while (count &&
+                              (err = sysfs_set_str(mdi, NULL,
+                                                   "array_state",
+                                                   "inactive")) < 0
+                              && errno == EBUSY) {
+                               usleep(200000);
+                               count--;
+                       }
+                       if (err) {
+                               if (verbose >= 0)
+                                       pr_err("failed to stop array %s: %s\n",
+                                              devname, strerror(errno));
+                               rv = 1;
+                               goto out;
                        }
 
                        /* Give monitor a chance to act */
@@ -248,11 +271,12 @@ int Manage_runstop(char *devname, int fd, int runstop, int quiet)
 
                        fd = open_dev_excl(devnum);
                        if (fd < 0) {
-                               fprintf(stderr, Name
-                                       ": failed to completely stop %s"
-                                       ": Device is busy\n",
-                                       devname);
-                               return 1;
+                               if (verbose >= 0)
+                                       pr_err("failed to completely stop %s"
+                                              ": Device is busy\n",
+                                              devname);
+                               rv = 1;
+                               goto out;
                        }
                } else if (mdi &&
                           mdi->array.major_version == -1 &&
@@ -269,20 +293,18 @@ int Manage_runstop(char *devname, int fd, int runstop, int quiet)
                         * which are members of this array
                         */
                        mds = mdstat_read(0, 0);
-                       for (m=mds; m; m=m->next)
+                       for (m = mds; m; m = m->next)
                                if (m->metadata_version &&
                                    strncmp(m->metadata_version, "external:", 9)==0 &&
                                    is_subarray(m->metadata_version+9) &&
                                    devname2devnum(m->metadata_version+10) == devnum) {
-                                       if (!quiet)
-                                               fprintf(stderr, Name
-                                                       ": Cannot stop container %s: "
-                                                       "member %s still active\n",
-                                                       devname, m->dev);
+                                       if (verbose >= 0)
+                                               pr_err("Cannot stop container %s: "
+                                                      "member %s still active\n",
+                                                      devname, m->dev);
                                        free_mdstat(mds);
-                                       if (mdi)
-                                               sysfs_free(mdi);
-                                       return 1;
+                                       rv = 1;
+                                       goto out;
                                }
                }
 
@@ -290,7 +312,7 @@ int Manage_runstop(char *devname, int fd, int runstop, int quiet)
                 * which blocks STOP_ARRAY is probably a transient use,
                 * so it is reasonable to retry for a while - 5 seconds.
                 */
-               count = 25;
+               count = 25; err = 0;
                while (count && fd >= 0
                       && (err = ioctl(fd, STOP_ARRAY, NULL)) < 0
                       && errno == EBUSY) {
@@ -298,18 +320,16 @@ int Manage_runstop(char *devname, int fd, int runstop, int quiet)
                        count --;
                }
                if (fd >= 0 && err) {
-                       if (quiet == 0) {
-                               fprintf(stderr, Name
-                                       ": failed to stop array %s: %s\n",
-                                       devname, strerror(errno));
+                       if (verbose >= 0) {
+                               pr_err("failed to stop array %s: %s\n",
+                                      devname, strerror(errno));
                                if (errno == EBUSY)
                                        fprintf(stderr, "Perhaps a running "
                                                "process, mounted filesystem "
                                                "or active volume group?\n");
                        }
-                       if (mdi)
-                               sysfs_free(mdi);
-                       return 1;
+                       rv = 1;
+                       goto out;
                }
                /* prior to 2.6.28, KOBJ_CHANGE was not sent when an md array
                 * was stopped, so We'll do it here just to be sure.  Drop any
@@ -320,7 +340,6 @@ int Manage_runstop(char *devname, int fd, int runstop, int quiet)
                if (mdi)
                        sysfs_uevent(mdi, "change");
 
-               
                if (devnum != NoMdDev &&
                    (stat("/dev/.udev", &stb) != 0 ||
                     check_env("MDADM_NO_UDEV"))) {
@@ -328,46 +347,624 @@ int Manage_runstop(char *devname, int fd, int runstop, int quiet)
                        remove_devices(devnum, mp ? mp->path : NULL);
                }
 
-
-               if (quiet <= 0)
-                       fprintf(stderr, Name ": stopped %s\n", devname);
+               if (verbose >= 0)
+                       pr_err("stopped %s\n", devname);
                map_lock(&map);
                map_remove(&map, devnum);
                map_unlock(&map);
+       out:
+               if (mdi)
+                       sysfs_free(mdi);
        }
-       return 0;
+       return rv;
 }
 
-int Manage_resize(char *devname, int fd, long long size, int raid_disks)
+static void add_faulty(struct mddev_dev *dv, int fd, char disp)
 {
-       mdu_array_info_t info;
-       if (ioctl(fd, GET_ARRAY_INFO, &info) != 0) {
-               fprintf(stderr, Name ": Cannot get array information for %s: %s\n",
-                       devname, strerror(errno));
-               return 1;
+       mdu_array_info_t array;
+       mdu_disk_info_t disk;
+       int remaining_disks;
+       int i;
+
+       if (ioctl(fd, GET_ARRAY_INFO, &array) != 0)
+               return;
+
+       remaining_disks = array.nr_disks;
+       for (i = 0; i < MAX_DISKS && remaining_disks > 0; i++) {
+               struct mddev_dev *new;
+               char buf[40];
+               disk.number = i;
+               if (ioctl(fd, GET_DISK_INFO, &disk) != 0)
+                       continue;
+               if (disk.major == 0 && disk.minor == 0)
+                       continue;
+               remaining_disks--;
+               if ((disk.state & 1) == 0) /* not faulty */
+                       continue;
+               sprintf(buf, "%d:%d", disk.major, disk.minor);
+               new = xmalloc(sizeof(*new));
+               new->devname = xstrdup(buf);
+               new->disposition = disp;
+               new->next = dv->next;
+               dv->next = new;
+               dv = new;
        }
-       if (size >= 0)
-               info.size = size;
-       if (raid_disks > 0)
-               info.raid_disks = raid_disks;
-       if (ioctl(fd, SET_ARRAY_INFO, &info) != 0) {
-               fprintf(stderr, Name ": Cannot set device size/shape for %s: %s\n",
-                       devname, strerror(errno));
-               return 1;
+}
+
+static void add_detached(struct mddev_dev *dv, int fd, char disp)
+{
+       mdu_array_info_t array;
+       mdu_disk_info_t disk;
+       int remaining_disks;
+       int i;
+
+       if (ioctl(fd, GET_ARRAY_INFO, &array) != 0)
+               return;
+
+       remaining_disks = array.nr_disks;
+       for (i = 0; i < MAX_DISKS && remaining_disks > 0; i++) {
+               struct mddev_dev *new;
+               char buf[40];
+               int sfd;
+               disk.number = i;
+               if (ioctl(fd, GET_DISK_INFO, &disk) != 0)
+                       continue;
+               if (disk.major == 0 && disk.minor == 0)
+                       continue;
+               remaining_disks--;
+               if (disp == 'f' && (disk.state & 1) != 0) /* already faulty */
+                       continue;
+               sprintf(buf, "%d:%d", disk.major, disk.minor);
+               sfd = dev_open(buf, O_RDONLY);
+               if (sfd >= 0) {
+                       /* Not detached */
+                       close(sfd);
+                       continue;
+               }
+               if (errno != ENXIO)
+                       /* Probably not detached */
+                       continue;
+               new = xmalloc(sizeof(*new));
+               new->devname = xstrdup(buf);
+               new->disposition = disp;
+               new->next = dv->next;
+               dv->next = new;
+               dv = new;
        }
+}
+
+int attempt_re_add(int fd, int tfd, struct mddev_dev *dv,
+                  struct supertype *dev_st, struct supertype *tst,
+                  unsigned long rdev,
+                  char *update, char *devname, int verbose,
+                  mdu_array_info_t *array)
+{
+       struct mdinfo mdi;
+       int duuid[4];
+       int ouuid[4];
+
+       dev_st->ss->getinfo_super(dev_st, &mdi, NULL);
+       dev_st->ss->uuid_from_super(dev_st, ouuid);
+       if (tst->sb)
+               tst->ss->uuid_from_super(tst, duuid);
+       else
+               /* Assume uuid matches: kernel will check */
+               memcpy(duuid, ouuid, sizeof(ouuid));
+       if ((mdi.disk.state & (1<<MD_DISK_ACTIVE)) &&
+           !(mdi.disk.state & (1<<MD_DISK_FAULTY)) &&
+           memcmp(duuid, ouuid, sizeof(ouuid))==0) {
+               /* Looks like it is worth a
+                * try.  Need to make sure
+                * kernel will accept it
+                * though.
+                */
+               mdu_disk_info_t disc;
+               /* re-add doesn't work for version-1 superblocks
+                * before 2.6.18 :-(
+                */
+               if (array->major_version == 1 &&
+                   get_linux_version() <= 2006018)
+                       goto skip_re_add;
+               disc.number = mdi.disk.number;
+               if (ioctl(fd, GET_DISK_INFO, &disc) != 0
+                   || disc.major != 0 || disc.minor != 0
+                       )
+                       goto skip_re_add;
+               disc.major = major(rdev);
+               disc.minor = minor(rdev);
+               disc.number = mdi.disk.number;
+               disc.raid_disk = mdi.disk.raid_disk;
+               disc.state = mdi.disk.state;
+               if (dv->writemostly == 1)
+                       disc.state |= 1 << MD_DISK_WRITEMOSTLY;
+               if (dv->writemostly == 2)
+                       disc.state &= ~(1 << MD_DISK_WRITEMOSTLY);
+               remove_partitions(tfd);
+               if (update || dv->writemostly > 0) {
+                       int rv = -1;
+                       tfd = dev_open(dv->devname, O_RDWR);
+                       if (tfd < 0) {
+                               pr_err("failed to open %s for"
+                                      " superblock update during re-add\n", dv->devname);
+                               return -1;
+                       }
+
+                       if (dv->writemostly == 1)
+                               rv = dev_st->ss->update_super(
+                                       dev_st, NULL, "writemostly",
+                                       devname, verbose, 0, NULL);
+                       if (dv->writemostly == 2)
+                               rv = dev_st->ss->update_super(
+                                       dev_st, NULL, "readwrite",
+                                       devname, verbose, 0, NULL);
+                       if (update)
+                               rv = dev_st->ss->update_super(
+                                       dev_st, NULL, update,
+                                       devname, verbose, 0, NULL);
+                       if (rv == 0)
+                               rv = dev_st->ss->store_super(dev_st, tfd);
+                       close(tfd);
+                       if (rv != 0) {
+                               pr_err("failed to update"
+                                      " superblock during re-add\n");
+                               return -1;
+                       }
+               }
+               /* don't even try if disk is marked as faulty */
+               errno = 0;
+               if (ioctl(fd, ADD_NEW_DISK, &disc) == 0) {
+                       if (verbose >= 0)
+                               pr_err("re-added %s\n", dv->devname);
+                       return 1;
+               }
+               if (errno == ENOMEM || errno == EROFS) {
+                       pr_err("add new device failed for %s: %s\n",
+                              dv->devname, strerror(errno));
+                       if (dv->disposition == 'M')
+                               return 0;
+                       return -1;
+               }
+       }
+skip_re_add:
        return 0;
 }
 
+int Manage_add(int fd, int tfd, struct mddev_dev *dv,
+              struct supertype *tst, mdu_array_info_t *array,
+              int force, int verbose, char *devname,
+              char *update, unsigned long rdev, unsigned long long array_size)
+{
+       unsigned long long ldsize;
+       struct supertype *dev_st = NULL;
+       int j;
+       mdu_disk_info_t disc;
+
+       if (!get_dev_size(tfd, dv->devname, &ldsize)) {
+               if (dv->disposition == 'M')
+                       return 0;
+               else
+                       return -1;
+       }
+
+       if (tst->ss->validate_geometry(
+                   tst, array->level, array->layout,
+                   array->raid_disks, NULL,
+                   ldsize >> 9, INVALID_SECTORS, NULL, NULL, 0) == 0) {
+               if (!force) {
+                       pr_err("%s is larger than %s can "
+                              "effectively use.\n"
+                              "       Add --force is you "
+                              "really want to add this device.\n",
+                              dv->devname, devname);
+                       return -1;
+               }
+               pr_err("%s is larger than %s can "
+                      "effectively use.\n"
+                      "       Adding anyway as --force "
+                      "was given.\n",
+                      dv->devname, devname);
+       }
+       if (!tst->ss->external &&
+           array->major_version == 0 &&
+           md_get_version(fd)%100 < 2) {
+               if (ioctl(fd, HOT_ADD_DISK, rdev)==0) {
+                       if (verbose >= 0)
+                               pr_err("hot added %s\n",
+                                      dv->devname);
+                       return 1;
+               }
+
+               pr_err("hot add failed for %s: %s\n",
+                      dv->devname, strerror(errno));
+               return -1;
+       }
+
+       if (array->not_persistent == 0 || tst->ss->external) {
+
+               /* need to find a sample superblock to copy, and
+                * a spare slot to use.
+                * For 'external' array (well, container based),
+                * We can just load the metadata for the array->
+                */
+               int array_failed;
+               if (tst->sb)
+                       /* already loaded */;
+               else if (tst->ss->external) {
+                       tst->ss->load_container(tst, fd, NULL);
+               } else for (j = 0; j < tst->max_devs; j++) {
+                               char *dev;
+                               int dfd;
+                               disc.number = j;
+                               if (ioctl(fd, GET_DISK_INFO, &disc))
+                                       continue;
+                               if (disc.major==0 && disc.minor==0)
+                                       continue;
+                               if ((disc.state & 4)==0) /* sync */
+                                       continue;
+                               /* Looks like a good device to try */
+                               dev = map_dev(disc.major, disc.minor, 1);
+                               if (!dev)
+                                       continue;
+                               dfd = dev_open(dev, O_RDONLY);
+                               if (dfd < 0)
+                                       continue;
+                               if (tst->ss->load_super(tst, dfd,
+                                                       NULL)) {
+                                       close(dfd);
+                                       continue;
+                               }
+                               close(dfd);
+                               break;
+                       }
+               /* FIXME this is a bad test to be using */
+               if (!tst->sb && dv->disposition != 'a') {
+                       /* we are re-adding a device to a
+                        * completely dead array - have to depend
+                        * on kernel to check
+                        */
+               } else if (!tst->sb) {
+                       pr_err("cannot load array metadata from %s\n", devname);
+                       return -1;
+               }
+
+               /* Make sure device is large enough */
+               if (tst->ss->avail_size(tst, ldsize/512, INVALID_SECTORS) <
+                   array_size) {
+                       if (dv->disposition == 'M')
+                               return 0;
+                       pr_err("%s not large enough to join array\n",
+                              dv->devname);
+                       return -1;
+               }
+
+               /* Possibly this device was recently part of
+                * the array and was temporarily removed, and
+                * is now being re-added.  If so, we can
+                * simply re-add it.
+                */
+
+               if (array->not_persistent==0) {
+                       dev_st = dup_super(tst);
+                       dev_st->ss->load_super(dev_st, tfd, NULL);
+               }
+               if (dev_st && dev_st->sb) {
+                       int rv = attempt_re_add(fd, tfd, dv,
+                                               dev_st, tst,
+                                               rdev,
+                                               update, devname,
+                                               verbose,
+                                               array);
+                       dev_st->ss->free_super(dev_st);
+                       if (rv)
+                               return rv;
+               }
+               if (dv->disposition == 'M') {
+                       if (verbose > 0)
+                               pr_err("--re-add for %s to %s is not possible\n",
+                                      dv->devname, devname);
+                       return 0;
+               }
+               if (dv->disposition == 'A') {
+                       pr_err("--re-add for %s to %s is not possible\n",
+                              dv->devname, devname);
+                       return -1;
+               }
+               if (array->active_disks < array->raid_disks) {
+                       char *avail = xcalloc(array->raid_disks, 1);
+                       int d;
+                       int found = 0;
+
+                       for (d = 0; d < MAX_DISKS && found < array->active_disks; d++) {
+                               disc.number = d;
+                               if (ioctl(fd, GET_DISK_INFO, &disc))
+                                       continue;
+                               if (disc.major == 0 && disc.minor == 0)
+                                       continue;
+                               if (!(disc.state & (1<<MD_DISK_SYNC)))
+                                       continue;
+                               avail[disc.raid_disk] = 1;
+                               found++;
+                       }
+                       array_failed = !enough(array->level, array->raid_disks,
+                                              array->layout, 1, avail);
+               } else
+                       array_failed = 0;
+               if (array_failed) {
+                       pr_err("%s has failed so using --add cannot work and might destroy\n",
+                              devname);
+                       pr_err("data on %s.  You should stop the array and re-assemble it.\n",
+                              dv->devname);
+                       return -1;
+               }
+       } else {
+               /* non-persistent. Must ensure that new drive
+                * is at least array->size big.
+                */
+               if (ldsize/512 < array_size) {
+                       pr_err("%s not large enough to join array\n",
+                              dv->devname);
+                       return -1;
+               }
+       }
+       /* committed to really trying this device now*/
+       remove_partitions(tfd);
+
+       /* in 2.6.17 and earlier, version-1 superblocks won't
+        * use the number we write, but will choose a free number.
+        * we must choose the same free number, which requires
+        * starting at 'raid_disks' and counting up
+        */
+       for (j = array->raid_disks; j < tst->max_devs; j++) {
+               disc.number = j;
+               if (ioctl(fd, GET_DISK_INFO, &disc))
+                       break;
+               if (disc.major==0 && disc.minor==0)
+                       break;
+               if (disc.state & 8) /* removed */
+                       break;
+       }
+       disc.major = major(rdev);
+       disc.minor = minor(rdev);
+       disc.number =j;
+       disc.state = 0;
+       if (array->not_persistent==0) {
+               int dfd;
+               if (dv->writemostly == 1)
+                       disc.state |= 1 << MD_DISK_WRITEMOSTLY;
+               dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
+               if (tst->ss->add_to_super(tst, &disc, dfd,
+                                         dv->devname, INVALID_SECTORS))
+                       return -1;
+               if (tst->ss->write_init_super(tst))
+                       return -1;
+       } else if (dv->disposition == 'A') {
+               /*  this had better be raid1.
+                * As we are "--re-add"ing we must find a spare slot
+                * to fill.
+                */
+               char *used = xcalloc(array->raid_disks, 1);
+               for (j = 0; j < tst->max_devs; j++) {
+                       mdu_disk_info_t disc2;
+                       disc2.number = j;
+                       if (ioctl(fd, GET_DISK_INFO, &disc2))
+                               continue;
+                       if (disc2.major==0 && disc2.minor==0)
+                               continue;
+                       if (disc2.state & 8) /* removed */
+                               continue;
+                       if (disc2.raid_disk < 0)
+                               continue;
+                       if (disc2.raid_disk > array->raid_disks)
+                               continue;
+                       used[disc2.raid_disk] = 1;
+               }
+               for (j = 0 ; j < array->raid_disks; j++)
+                       if (!used[j]) {
+                               disc.raid_disk = j;
+                               disc.state |= (1<<MD_DISK_SYNC);
+                               break;
+                       }
+               free(used);
+       }
+       if (dv->writemostly == 1)
+               disc.state |= (1 << MD_DISK_WRITEMOSTLY);
+       if (tst->ss->external) {
+               /* add a disk
+                * to an external metadata container */
+               struct mdinfo new_mdi;
+               struct mdinfo *sra;
+               int container_fd;
+               int devnum = fd2devnum(fd);
+               int dfd;
+
+               container_fd = open_dev_excl(devnum);
+               if (container_fd < 0) {
+                       pr_err("add failed for %s:"
+                              " could not get exclusive access to container\n",
+                              dv->devname);
+                       tst->ss->free_super(tst);
+                       return -1;
+               }
+
+               Kill(dv->devname, NULL, 0, -1, 0);
+               dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
+               if (mdmon_running(tst->container_dev))
+                       tst->update_tail = &tst->updates;
+               if (tst->ss->add_to_super(tst, &disc, dfd,
+                                         dv->devname, INVALID_SECTORS)) {
+                       close(dfd);
+                       close(container_fd);
+                       return -1;
+               }
+               if (tst->update_tail)
+                       flush_metadata_updates(tst);
+               else
+                       tst->ss->sync_metadata(tst);
+
+               sra = sysfs_read(container_fd, -1, 0);
+               if (!sra) {
+                       pr_err("add failed for %s: sysfs_read failed\n",
+                              dv->devname);
+                       close(container_fd);
+                       tst->ss->free_super(tst);
+                       return -1;
+               }
+               sra->array.level = LEVEL_CONTAINER;
+               /* Need to set data_offset and component_size */
+               tst->ss->getinfo_super(tst, &new_mdi, NULL);
+               new_mdi.disk.major = disc.major;
+               new_mdi.disk.minor = disc.minor;
+               new_mdi.recovery_start = 0;
+               /* Make sure fds are closed as they are O_EXCL which
+                * would block add_disk */
+               tst->ss->free_super(tst);
+               if (sysfs_add_disk(sra, &new_mdi, 0) != 0) {
+                       pr_err("add new device to external metadata"
+                              " failed for %s\n", dv->devname);
+                       close(container_fd);
+                       sysfs_free(sra);
+                       return -1;
+               }
+               ping_monitor_by_id(devnum);
+               sysfs_free(sra);
+               close(container_fd);
+       } else {
+               tst->ss->free_super(tst);
+               if (ioctl(fd, ADD_NEW_DISK, &disc)) {
+                       pr_err("add new device failed for %s as %d: %s\n",
+                              dv->devname, j, strerror(errno));
+                       return -1;
+               }
+       }
+       if (verbose >= 0)
+               pr_err("added %s\n", dv->devname);
+       return 1;
+}
+
+int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
+                 int sysfd, unsigned long rdev, int verbose, char *devname)
+{
+       int lfd = -1;
+       int err;
+
+       if (tst->ss->external) {
+               /* To remove a device from a container, we must
+                * check that it isn't in use in an array.
+                * This involves looking in the 'holders'
+                * directory - there must be just one entry,
+                * the container.
+                * To ensure that it doesn't get used as a
+                * hot spare while we are checking, we
+                * get an O_EXCL open on the container
+                */
+               int ret;
+               int dnum = fd2devnum(fd);
+               lfd = open_dev_excl(dnum);
+               if (lfd < 0) {
+                       pr_err("Cannot get exclusive access "
+                              " to container - odd\n");
+                       return -1;
+               }
+               /* We may not be able to check on holders in
+                * sysfs, either because we don't have the dev num
+                * (rdev == 0) or because the device has been detached
+                * and the 'holders' directory no longer exists
+                * (ret == -1).  In that case, assume it is OK to
+                * remove.
+                */
+               if (rdev == 0)
+                       ret = -1;
+               else
+                       ret = sysfs_unique_holder(dnum, rdev);
+               if (ret == 0) {
+                       pr_err("%s is not a member, cannot remove.\n",
+                              dv->devname);
+                       close(lfd);
+                       return -1;
+               }
+               if (ret >= 2) {
+                       pr_err("%s is still in use, cannot remove.\n",
+                              dv->devname);
+                       close(lfd);
+                       return -1;
+               }
+       }
+       /* FIXME check that it is a current member */
+       if (sysfd >= 0) {
+               /* device has been removed and we don't know
+                * the major:minor number
+                */
+               int n = write(sysfd, "remove", 6);
+               if (n != 6)
+                       err = -1;
+               else
+                       err = 0;
+       } else {
+               err = ioctl(fd, HOT_REMOVE_DISK, rdev);
+               if (err && errno == ENODEV) {
+                       /* Old kernels rejected this if no personality
+                        * is registered */
+                       struct mdinfo *sra = sysfs_read(fd, 0, GET_DEVS);
+                       struct mdinfo *dv = NULL;
+                       if (sra)
+                               dv = sra->devs;
+                       for ( ; dv ; dv=dv->next)
+                               if (dv->disk.major == (int)major(rdev) &&
+                                   dv->disk.minor == (int)minor(rdev))
+                                       break;
+                       if (dv)
+                               err = sysfs_set_str(sra, dv,
+                                                   "state", "remove");
+                       else
+                               err = -1;
+                       if (sra)
+                               sysfs_free(sra);
+               }
+       }
+       if (err) {
+               pr_err("hot remove failed "
+                      "for %s: %s\n",  dv->devname,
+                      strerror(errno));
+               if (lfd >= 0)
+                       close(lfd);
+               return -1;
+       }
+       if (tst->ss->external) {
+               /*
+                * Before dropping our exclusive open we make an
+                * attempt at preventing mdmon from seeing an
+                * 'add' event before reconciling this 'remove'
+                * event.
+                */
+               char *name = devnum2devname(fd2devnum(fd));
+
+               if (!name) {
+                       pr_err("unable to get container name\n");
+                       return -1;
+               }
+
+               ping_manager(name);
+               free(name);
+       }
+       if (lfd >= 0)
+               close(lfd);
+       if (verbose >= 0)
+               pr_err("hot removed %s from %s\n",
+                      dv->devname, devname);
+       return 1;
+}
+
 int Manage_subdevs(char *devname, int fd,
                   struct mddev_dev *devlist, int verbose, int test,
-                  char *update)
+                  char *update, int force)
 {
-       /* do something to each dev.
+       /* Do something to each dev.
         * devmode can be
         *  'a' - add the device
         *         try HOT_ADD_DISK
         *         If that fails EINVAL, try ADD_NEW_DISK
-        *  'r' - remove the device HOT_REMOVE_DISK
+        *  'A' - re-add the device
+        *  'r' - remove the device: HOT_REMOVE_DISK
         *        device can be 'faulty' or 'detached' in which case all
         *        matching devices are removed.
         *  'f' - set the device faulty SET_DISK_FAULTY
@@ -376,29 +973,26 @@ int Manage_subdevs(char *devname, int fd,
         * For 'f' and 'r', the device can also be a kernel-internal
         * name such as 'sdb'.
         */
-       struct mddev_dev *add_devlist = NULL;
        mdu_array_info_t array;
-       mdu_disk_info_t disc;
        unsigned long long array_size;
-       struct mddev_dev *dv, *next = NULL;
+       struct mddev_dev *dv;
        struct stat stb;
-       int j, jnext = 0;
        int tfd = -1;
-       struct supertype *st, *tst;
+       struct supertype *tst;
        char *subarray = NULL;
-       int duuid[4];
-       int ouuid[4];
-       int lfd = -1;
        int sysfd = -1;
        int count = 0; /* number of actions taken */
+       struct mdinfo info;
+       int frozen = 0;
 
        if (ioctl(fd, GET_ARRAY_INFO, &array)) {
-               fprintf(stderr, Name ": cannot get array info for %s\n",
+               pr_err("Cannot get array info for %s\n",
                        devname);
-               return 1;
+               goto abort;
        }
+       sysfs_init(&info, fd, 0);
 
-       /* array.size is only 32 bit and may be truncated.
+       /* array.size is only 32 bits and may be truncated.
         * So read from sysfs if possible, and record number of sectors
         */
 
@@ -408,124 +1002,69 @@ int Manage_subdevs(char *devname, int fd,
 
        tst = super_by_fd(fd, &subarray);
        if (!tst) {
-               fprintf(stderr, Name ": unsupport array - version %d.%d\n",
+               pr_err("unsupport array - version %d.%d\n",
                        array.major_version, array.minor_version);
-               return 1;
+               goto abort;
        }
 
        stb.st_rdev = 0;
-       for (dv = devlist, j=0 ; dv; dv = next, j = jnext) {
-               unsigned long long ldsize;
-               char dvname[20];
-               char *dnprintable = dv->devname;
-               char *add_dev = dv->devname;
-               int err;
-               int re_add_failed = 0;
+       for (dv = devlist; dv; dv = dv->next) {
+               int rv;
 
-               next = dv->next;
-               jnext = 0;
-
-               if (strcmp(dv->devname, "failed")==0 ||
-                   strcmp(dv->devname, "faulty")==0) {
+               if (strcmp(dv->devname, "failed") == 0 ||
+                   strcmp(dv->devname, "faulty") == 0) {
                        if (dv->disposition != 'r') {
-                               fprintf(stderr, Name ": %s only meaningful "
+                               pr_err("%s only meaningful "
                                        "with -r, not -%c\n",
                                        dv->devname, dv->disposition);
-                               return 1;
+                               goto abort;
                        }
-                       for (; j < array.raid_disks + array.nr_disks ; j++) {
-                               unsigned dev;
-                               disc.number = j;
-                               if (ioctl(fd, GET_DISK_INFO, &disc))
-                                       continue;
-                               if (disc.major == 0 && disc.minor == 0)
-                                       continue;
-                               if ((disc.state & 1) == 0) /* faulty */
-                                       continue;
-                               dev = makedev(disc.major, disc.minor);
-                               if (stb.st_rdev == dev)
-                                       /* already did that one */
-                                       continue;
-                               stb.st_rdev = dev;
-                               next = dv;
-                               /* same slot again next time - things might
-                                * have reshuffled */
-                               jnext = j;
-                               sprintf(dvname,"%d:%d", disc.major, disc.minor);
-                               dnprintable = dvname;
-                               break;
-                       }
-                       if (next != dv)
-                               continue;
-               } else if (strcmp(dv->devname, "detached") == 0) {
+                       add_faulty(dv, fd, 'r');
+                       continue;
+               }
+               if (strcmp(dv->devname, "detached") == 0) {
                        if (dv->disposition != 'r' && dv->disposition != 'f') {
-                               fprintf(stderr, Name ": %s only meaningful "
+                               pr_err("%s only meaningful "
                                        "with -r of -f, not -%c\n",
                                        dv->devname, dv->disposition);
-                               return 1;
+                               goto abort;
                        }
-                       for (; j < array.raid_disks + array.nr_disks; j++) {
-                               int sfd;
-                               unsigned dev;
-                               disc.number = j;
-                               if (ioctl(fd, GET_DISK_INFO, &disc))
-                                       continue;
-                               if (disc.major == 0 && disc.minor == 0)
-                                       continue;
-                               sprintf(dvname,"%d:%d", disc.major, disc.minor);
-                               sfd = dev_open(dvname, O_RDONLY);
-                               if (sfd >= 0) {
-                                       close(sfd);
-                                       continue;
-                               }
-                               if (dv->disposition == 'f' &&
-                                   (disc.state & 1) == 1) /* already faulty */
-                                       continue;
-                               if (errno != ENXIO)
-                                       continue;
-                               dev = makedev(disc.major, disc.minor);
-                               if (stb.st_rdev == dev)
-                                       /* already did that one */
-                                       continue;
-                               stb.st_rdev = dev;
-                               next = dv;
-                               /* same slot again next time - things might
-                                * have reshuffled */
-                               jnext = j;
-                               dnprintable = dvname;
-                               break;
-                       }
-                       if (next != dv)
-                               continue;
-               } else if (strcmp(dv->devname, "missing") == 0) {
-                       if (dv->disposition != 'a' || dv->re_add == 0) {
-                               fprintf(stderr, Name ": 'missing' only meaningful "
-                                       "with --re-add\n");
-                               return 1;
+                       add_detached(dv, fd, dv->disposition);
+                       continue;
+               }
+
+               if (strcmp(dv->devname, "missing") == 0) {
+                       struct mddev_dev *add_devlist = NULL;
+                       struct mddev_dev **dp;
+                       if (dv->disposition != 'A') {
+                               pr_err("'missing' only meaningful "
+                                      "with --re-add\n");
+                               goto abort;
                        }
-                       if (add_devlist == NULL)
-                               add_devlist = conf_get_devs();
+                       add_devlist = conf_get_devs();
                        if (add_devlist == NULL) {
-                               fprintf(stderr, Name ": no devices to scan for missing members.");
+                               pr_err("no devices to scan for missing members.");
                                continue;
                        }
-                       add_dev = add_devlist->devname;
-                       add_devlist = add_devlist->next;
-                       if (add_devlist != NULL)
-                               next = dv;
-                       if (stat(add_dev, &stb) < 0)
-                               continue;
-               } else if (strchr(dv->devname, '/') == NULL &&
-                          strchr(dv->devname, ':') == NULL &&
-                          strlen(dv->devname) < 50) {
+                       for (dp = &add_devlist; *dp; dp = & (*dp)->next)
+                               /* 'M' (for 'missing') is like 'A' without errors */
+                               (*dp)->disposition = 'M';
+                       *dp = dv->next;
+                       dv->next = add_devlist;
+                       continue;
+               }
+
+               if (strchr(dv->devname, '/') == NULL &&
+                   strchr(dv->devname, ':') == NULL &&
+                   strlen(dv->devname) < 50) {
                        /* Assume this is a kernel-internal name like 'sda1' */
                        int found = 0;
                        char dname[55];
                        if (dv->disposition != 'r' && dv->disposition != 'f') {
-                               fprintf(stderr, Name ": %s only meaningful "
+                               pr_err("%s only meaningful "
                                        "with -r or -f, not -%c\n",
                                        dv->devname, dv->disposition);
-                               return 1;
+                               goto abort;
                        }
 
                        sprintf(dname, "dev-%s", dv->devname);
@@ -544,15 +1083,13 @@ int Manage_subdevs(char *devname, int fd,
                        if (!found) {
                                sysfd = sysfs_open(fd2devnum(fd), dname, "state");
                                if (sysfd < 0) {
-                                       fprintf(stderr, Name ": %s does not appear "
+                                       pr_err("%s does not appear "
                                                "to be a component of %s\n",
                                                dv->devname, devname);
-                                       return 1;
+                                       goto abort;
                                }
                        }
                } else {
-                       j = 0;
-
                        tfd = dev_open(dv->devname, O_RDONLY);
                        if (tfd < 0 && dv->disposition == 'r' &&
                            lstat(dv->devname, &stb) == 0)
@@ -562,504 +1099,95 @@ int Manage_subdevs(char *devname, int fd,
                                ;
                        else {
                                if (tfd < 0 || fstat(tfd, &stb) != 0) {
-                                       fprintf(stderr, Name ": cannot find %s: %s\n",
-                                               dv->devname, strerror(errno));
                                        if (tfd >= 0)
                                                close(tfd);
-                                       return 1;
+                                       if (dv->disposition == 'M')
+                                               /* non-fatal */
+                                               continue;
+                                       pr_err("cannot find %s: %s\n",
+                                               dv->devname, strerror(errno));
+                                       goto abort;
                                }
                                close(tfd);
                                tfd = -1;
                        }
                        if ((stb.st_mode & S_IFMT) != S_IFBLK) {
-                               fprintf(stderr, Name ": %s is not a "
+                               if (dv->disposition == 'M')
+                                       /* non-fatal. Also improbable */
+                                       continue;
+                               pr_err("%s is not a "
                                        "block device.\n",
                                        dv->devname);
-                               return 1;
+                               goto abort;
                        }
                }
                switch(dv->disposition){
                default:
-                       fprintf(stderr, Name ": internal error - devmode[%s]=%d\n",
+                       pr_err("internal error - devmode[%s]=%d\n",
                                dv->devname, dv->disposition);
-                       return 1;
+                       goto abort;
                case 'a':
+               case 'A':
+               case 'M':
                        /* add the device */
                        if (subarray) {
-                               fprintf(stderr, Name ": Cannot add disks to a"
+                               pr_err("Cannot add disks to a"
                                        " \'member\' array, perform this"
                                        " operation on the parent container\n");
-                               return 1;
+                               goto abort;
                        }
                        /* Make sure it isn't in use (in 2.6 or later) */
-                       tfd = dev_open(add_dev, O_RDONLY|O_EXCL|O_DIRECT);
-                       if (tfd < 0 && add_dev != dv->devname)
-                               continue;
-                       if (tfd < 0) {
-                               fprintf(stderr, Name ": Cannot open %s: %s\n",
-                                       dv->devname, strerror(errno));
-                               return 1;
-                       }
-
-                       st = dup_super(tst);
-
-                       if (array.not_persistent==0)
-                               st->ss->load_super(st, tfd, NULL);
-
-                       if (add_dev == dv->devname) {
-                               if (!get_dev_size(tfd, dv->devname, &ldsize)) {
-                                       close(tfd);
-                                       return 1;
-                               }
-                       } else if (!get_dev_size(tfd, NULL, &ldsize)) {
-                               close(tfd);
-                               tfd = -1;
-                               continue;
-                       }
-
-                       if (!tst->ss->external &&
-                           array.major_version == 0 &&
-                           md_get_version(fd)%100 < 2) {
-                               close(tfd);
-                               tfd = -1;
-                               if (ioctl(fd, HOT_ADD_DISK,
-                                         (unsigned long)stb.st_rdev)==0) {
-                                       if (verbose >= 0)
-                                               fprintf(stderr, Name ": hot added %s\n",
-                                                       add_dev);
-                                       continue;
-                               }
-
-                               fprintf(stderr, Name ": hot add failed for %s: %s\n",
-                                       add_dev, strerror(errno));
-                               return 1;
-                       }
-
-                       if (array.not_persistent == 0 || tst->ss->external) {
-
-                               /* need to find a sample superblock to copy, and
-                                * a spare slot to use.
-                                * For 'external' array (well, container based),
-                                * We can just load the metadata for the array.
-                                */
-                               if (tst->sb)
-                                       /* already loaded */;
-                               else if (tst->ss->external) {
-                                       tst->ss->load_container(tst, fd, NULL);
-                               } else for (j = 0; j < tst->max_devs; j++) {
-                                       char *dev;
-                                       int dfd;
-                                       disc.number = j;
-                                       if (ioctl(fd, GET_DISK_INFO, &disc))
-                                               continue;
-                                       if (disc.major==0 && disc.minor==0)
-                                               continue;
-                                       if ((disc.state & 4)==0) continue; /* sync */
-                                       /* Looks like a good device to try */
-                                       dev = map_dev(disc.major, disc.minor, 1);
-                                       if (!dev) continue;
-                                       dfd = dev_open(dev, O_RDONLY);
-                                       if (dfd < 0) continue;
-                                       if (tst->ss->load_super(tst, dfd,
-                                                               NULL)) {
-                                               close(dfd);
-                                               continue;
-                                       }
-                                       close(dfd);
-                                       break;
-                               }
-                               /* FIXME this is a bad test to be using */
-                               if (!tst->sb) {
-                                       close(tfd);
-                                       fprintf(stderr, Name ": cannot load array metadata from %s\n", devname);
-                                       return 1;
-                               }
-
-                               /* Make sure device is large enough */
-                               if (tst->ss->avail_size(tst, ldsize/512) <
-                                   array_size) {
-                                       close(tfd);
-                                       tfd = -1;
-                                       if (add_dev != dv->devname)
-                                               continue;
-                                       fprintf(stderr, Name ": %s not large enough to join array\n",
-                                               dv->devname);
-                                       return 1;
-                               }
-
-                               /* Possibly this device was recently part of the array
-                                * and was temporarily removed, and is now being re-added.
-                                * If so, we can simply re-add it.
-                                */
-                               tst->ss->uuid_from_super(tst, duuid);
-
-                               /* re-add doesn't work for version-1 superblocks
-                                * before 2.6.18 :-(
-                                */
-                               if (array.major_version == 1 &&
-                                   get_linux_version() <= 2006018)
-                                       ;
-                               else if (st->sb) {
-                                       struct mdinfo mdi;
-                                       st->ss->getinfo_super(st, &mdi, NULL);
-                                       st->ss->uuid_from_super(st, ouuid);
-                                       if ((mdi.disk.state & (1<<MD_DISK_ACTIVE)) &&
-                                           !(mdi.disk.state & (1<<MD_DISK_FAULTY)) &&
-                                           memcmp(duuid, ouuid, sizeof(ouuid))==0) {
-                                               /* look like it is worth a try.  Need to
-                                                * make sure kernel will accept it though.
-                                                */
-                                               disc.number = mdi.disk.number;
-                                               if (ioctl(fd, GET_DISK_INFO, &disc) != 0
-                                                   || disc.major != 0 || disc.minor != 0
-                                                   || !enough_fd(fd))
-                                                       goto skip_re_add;
-                                               disc.major = major(stb.st_rdev);
-                                               disc.minor = minor(stb.st_rdev);
-                                               disc.number = mdi.disk.number;
-                                               disc.raid_disk = mdi.disk.raid_disk;
-                                               disc.state = mdi.disk.state;
-                                               if (dv->writemostly == 1)
-                                                       disc.state |= 1 << MD_DISK_WRITEMOSTLY;
-                                               if (dv->writemostly == 2)
-                                                       disc.state &= ~(1 << MD_DISK_WRITEMOSTLY);
-                                               remove_partitions(tfd);
-                                               close(tfd);
-                                               tfd = -1;
-                                               if (update) {
-                                                       int rv = -1;
-                                                       tfd = dev_open(dv->devname, O_RDWR);
-
-                                                       if (tfd >= 0)
-                                                               rv = st->ss->update_super(
-                                                                       st, NULL, update,
-                                                                       devname, verbose, 0, NULL);
-                                                       if (rv == 0)
-                                                               rv = tst->ss->store_super(st, tfd);
-                                                       close(tfd);
-                                                       tfd = -1;
-                                                       if (rv != 0) {
-                                                               fprintf(stderr, Name ": failed to update"
-                                                                       " superblock during re-add\n");
-                                                               return 1;
-                                                       }
-                                               }
-                                               /* don't even try if disk is marked as faulty */
-                                               errno = 0;
-                                               if (ioctl(fd, ADD_NEW_DISK, &disc) == 0) {
-                                                       if (verbose >= 0)
-                                                               fprintf(stderr, Name ": re-added %s\n", add_dev);
-                                                       count++;
-                                                       continue;
-                                               }
-                                               if (errno == ENOMEM || errno == EROFS) {
-                                                       fprintf(stderr, Name ": add new device failed for %s: %s\n",
-                                                               add_dev, strerror(errno));
-                                                       if (add_dev != dv->devname)
-                                                               continue;
-                                                       return 1;
-                                               }
-                                       skip_re_add:
-                                               re_add_failed = 1;
-                                       }
-                                       st->ss->free_super(st);
-                               }
-                               if (add_dev != dv->devname) {
-                                       if (verbose > 0)
-                                               fprintf(stderr, Name
-                                                       ": --re-add for %s to %s is not possible\n",
-                                                       add_dev, devname);
-                                       if (tfd >= 0) {
-                                               close(tfd);
-                                               tfd = -1;
-                                       }
-                                       continue;
-                               }
-                               if (dv->re_add) {
-                                       if (tfd >= 0)
-                                               close(tfd);
-                                       fprintf(stderr, Name
-                                               ": --re-add for %s to %s is not possible\n",
-                                               dv->devname, devname);
-                                       return 1;
-                               }
-                               if (re_add_failed) {
-                                       fprintf(stderr, Name ": %s reports being an active member for %s, but a --re-add fails.\n",
-                                               dv->devname, devname);
-                                       fprintf(stderr, Name ": not performing --add as that would convert %s in to a spare.\n",
-                                               dv->devname);
-                                       fprintf(stderr, Name ": To make this a spare, use \"mdadm --zero-superblock %s\" first.\n",     
-                                               dv->devname);
-                                       if (tfd >= 0)
-                                               close(tfd);
-                                       return 1;
-                               }
-                       } else {
-                               /* non-persistent. Must ensure that new drive
-                                * is at least array.size big.
-                                */
-                               if (ldsize/512 < array_size) {
-                                       fprintf(stderr, Name ": %s not large enough to join array\n",
-                                               dv->devname);
-                                       if (tfd >= 0)
-                                               close(tfd);
-                                       return 1;
-                               }
-                       }
-                       /* committed to really trying this device now*/
+                       tfd = dev_open(dv->devname, O_RDONLY|O_EXCL);
                        if (tfd >= 0) {
-                               remove_partitions(tfd);
-                               close(tfd);
-                               tfd = -1;
-                       }
-                       /* in 2.6.17 and earlier, version-1 superblocks won't
-                        * use the number we write, but will choose a free number.
-                        * we must choose the same free number, which requires
-                        * starting at 'raid_disks' and counting up
-                        */
-                       for (j = array.raid_disks; j< tst->max_devs; j++) {
-                               disc.number = j;
-                               if (ioctl(fd, GET_DISK_INFO, &disc))
-                                       break;
-                               if (disc.major==0 && disc.minor==0)
-                                       break;
-                               if (disc.state & 8) /* removed */
-                                       break;
-                       }
-                       disc.major = major(stb.st_rdev);
-                       disc.minor = minor(stb.st_rdev);
-                       disc.number =j;
-                       disc.state = 0;
-                       if (array.not_persistent==0) {
-                               int dfd;
-                               if (dv->writemostly == 1)
-                                       disc.state |= 1 << MD_DISK_WRITEMOSTLY;
-                               dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
-                               if (tst->ss->add_to_super(tst, &disc, dfd,
-                                                         dv->devname)) {
-                                       close(dfd);
-                                       return 1;
-                               }
-                               if (tst->ss->write_init_super(tst)) {
-                                       close(dfd);
-                                       return 1;
-                               }
-                       } else if (dv->re_add) {
-                               /*  this had better be raid1.
-                                * As we are "--re-add"ing we must find a spare slot
-                                * to fill.
+                               /* We know no-one else is using it.  We'll
+                                * need non-exclusive access to add it, so
+                                * do that now.
                                 */
-                               char *used = malloc(array.raid_disks);
-                               memset(used, 0, array.raid_disks);
-                               for (j=0; j< tst->max_devs; j++) {
-                                       mdu_disk_info_t disc2;
-                                       disc2.number = j;
-                                       if (ioctl(fd, GET_DISK_INFO, &disc2))
-                                               continue;
-                                       if (disc2.major==0 && disc2.minor==0)
-                                               continue;
-                                       if (disc2.state & 8) /* removed */
-                                               continue;
-                                       if (disc2.raid_disk < 0)
-                                               continue;
-                                       if (disc2.raid_disk > array.raid_disks)
-                                               continue;
-                                       used[disc2.raid_disk] = 1;
-                               }
-                               for (j=0 ; j<array.raid_disks; j++)
-                                       if (!used[j]) {
-                                               disc.raid_disk = j;
-                                               disc.state |= (1<<MD_DISK_SYNC);
-                                               break;
-                                       }
-                               free(used);
+                               close(tfd);
+                               tfd = dev_open(dv->devname, O_RDONLY);
+                       }                               
+                       if (tfd < 0) {
+                               if (dv->disposition == 'M')
+                                       continue;
+                               pr_err("Cannot open %s: %s\n",
+                                       dv->devname, strerror(errno));
+                               goto abort;
                        }
-                       if (dv->writemostly == 1)
-                               disc.state |= (1 << MD_DISK_WRITEMOSTLY);
-                       if (tst->ss->external) {
-                               /* add a disk
-                                * to an external metadata container */
-                               struct mdinfo new_mdi;
-                               struct mdinfo *sra;
-                               int container_fd;
-                               int devnum = fd2devnum(fd);
-                               int dfd;
-
-                               container_fd = open_dev_excl(devnum);
-                               if (container_fd < 0) {
-                                       fprintf(stderr, Name ": add failed for %s:"
-                                               " could not get exclusive access to container\n",
-                                               dv->devname);
-                                       tst->ss->free_super(tst);
-                                       return 1;
-                               }
-
-                               dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
-                               if (mdmon_running(tst->container_dev))
-                                       tst->update_tail = &tst->updates;
-                               if (tst->ss->add_to_super(tst, &disc, dfd,
-                                                         dv->devname)) {
-                                       close(dfd);
-                                       close(container_fd);
-                                       return 1;
-                               }
-                               if (st->update_tail)
-                                       flush_metadata_updates(st);
+                       if (!frozen) {
+                               if (sysfs_freeze_array(&info) == 1)
+                                       frozen = 1;
                                else
-                                       tst->ss->sync_metadata(st);
-
-                               sra = sysfs_read(container_fd, -1, 0);
-                               if (!sra) {
-                                       fprintf(stderr, Name ": add failed for %s: sysfs_read failed\n",
-                                               dv->devname);
-                                       close(container_fd);
-                                       tst->ss->free_super(tst);
-                                       return 1;
-                               }
-                               sra->array.level = LEVEL_CONTAINER;
-                               /* Need to set data_offset and component_size */
-                               tst->ss->getinfo_super(tst, &new_mdi, NULL);
-                               new_mdi.disk.major = disc.major;
-                               new_mdi.disk.minor = disc.minor;
-                               new_mdi.recovery_start = 0;
-                               /* Make sure fds are closed as they are O_EXCL which
-                                * would block add_disk */
-                               tst->ss->free_super(tst);
-                               if (sysfs_add_disk(sra, &new_mdi, 0) != 0) {
-                                       fprintf(stderr, Name ": add new device to external metadata"
-                                               " failed for %s\n", dv->devname);
-                                       close(container_fd);
-                                       sysfs_free(sra);
-                                       return 1;
-                               }
-                               ping_monitor(devnum2devname(devnum));
-                               sysfs_free(sra);
-                               close(container_fd);
-                       } else {
-                               tst->ss->free_super(tst);
-                               if (ioctl(fd, ADD_NEW_DISK, &disc)) {
-                                       fprintf(stderr, Name ": add new device failed for %s as %d: %s\n",
-                                               dv->devname, j, strerror(errno));
-                                       return 1;
-                               }
+                                       frozen = -1;
                        }
-                       if (verbose >= 0)
-                               fprintf(stderr, Name ": added %s\n", dv->devname);
+                       rv = Manage_add(fd, tfd, dv, tst, &array,
+                                       force, verbose, devname, update,
+                                       stb.st_rdev, array_size);
+                       close(tfd);
+                       tfd = -1;
+                       if (rv < 0)
+                               goto abort;
+                       if (rv > 0)
+                               count++;
                        break;
 
                case 'r':
                        /* hot remove */
                        if (subarray) {
-                               fprintf(stderr, Name ": Cannot remove disks from a"
+                               pr_err("Cannot remove disks from a"
                                        " \'member\' array, perform this"
                                        " operation on the parent container\n");
-                               if (sysfd >= 0)
-                                       close(sysfd);
-                               return 1;
-                       }
-                       if (tst->ss->external) {
-                               /* To remove a device from a container, we must
-                                * check that it isn't in use in an array.
-                                * This involves looking in the 'holders'
-                                * directory - there must be just one entry,
-                                * the container.
-                                * To ensure that it doesn't get used as a
-                                * hold spare while we are checking, we
-                                * get an O_EXCL open on the container
-                                */
-                               int dnum = fd2devnum(fd);
-                               lfd = open_dev_excl(dnum);
-                               if (lfd < 0) {
-                                       fprintf(stderr, Name
-                                               ": Cannot get exclusive access "
-                                               " to container - odd\n");
-                                       if (sysfd >= 0)
-                                               close(sysfd);
-                                       return 1;
-                               }
-                               /* in the detached case it is not possible to
-                                * check if we are the unique holder, so just
-                                * rely on the 'detached' checks
-                                */
-                               if (strcmp(dv->devname, "detached") == 0 ||
-                                   sysfd >= 0 ||
-                                   sysfs_unique_holder(dnum, stb.st_rdev))
-                                       /* pass */;
-                               else {
-                                       fprintf(stderr, Name
-                                               ": %s is %s, cannot remove.\n",
-                                               dnprintable,
-                                               errno == EEXIST ? "still in use":
-                                               "not a member");
-                                       close(lfd);
-                                       return 1;
-                               }
-                       }
-                       /* FIXME check that it is a current member */
-                       if (sysfd >= 0) {
-                               /* device has been removed and we don't know
-                                * the major:minor number
-                                */
-                               int n = write(sysfd, "remove", 6);
-                               if (n != 6)
-                                       err = -1;
-                               else
-                                       err = 0;
+                               rv = -1;
+                       } else
+                               rv = Manage_remove(tst, fd, dv, sysfd,
+                                                  stb.st_rdev, verbose,
+                                                  devname);
+                       if (sysfd >= 0)
                                close(sysfd);
-                               sysfd = -1;
-                       } else {
-                               err = ioctl(fd, HOT_REMOVE_DISK, (unsigned long)stb.st_rdev);
-                               if (err && errno == ENODEV) {
-                                       /* Old kernels rejected this if no personality
-                                        * registered */
-                                       struct mdinfo *sra = sysfs_read(fd, 0, GET_DEVS);
-                                       struct mdinfo *dv = NULL;
-                                       if (sra)
-                                               dv = sra->devs;
-                                       for ( ; dv ; dv=dv->next)
-                                               if (dv->disk.major == (int)major(stb.st_rdev) &&
-                                                   dv->disk.minor == (int)minor(stb.st_rdev))
-                                                       break;
-                                       if (dv)
-                                               err = sysfs_set_str(sra, dv,
-                                                                   "state", "remove");
-                                       else
-                                               err = -1;
-                                       if (sra)
-                                               sysfs_free(sra);
-                               }
-                       }
-                       if (err) {
-                               fprintf(stderr, Name ": hot remove failed "
-                                       "for %s: %s\n", dnprintable,
-                                       strerror(errno));
-                               if (lfd >= 0)
-                                       close(lfd);
-                               return 1;
-                       }
-                       if (tst->ss->external) {
-                               /*
-                                * Before dropping our exclusive open we make an
-                                * attempt at preventing mdmon from seeing an
-                                * 'add' event before reconciling this 'remove'
-                                * event.
-                                */
-                               char *name = devnum2devname(fd2devnum(fd));
-
-                               if (!name) {
-                                       fprintf(stderr, Name ": unable to get container name\n");
-                                       return 1;
-                               }
-
-                               ping_manager(name);
-                               free(name);
-                       }
-                       if (lfd >= 0)
-                               close(lfd);
-                       count++;
-                       if (verbose >= 0)
-                               fprintf(stderr, Name ": hot removed %s from %s\n",
-                                       dnprintable, devname);
+                       sysfd = -1;
+                       if (rv < 0)
+                               goto abort;
+                       if (rv > 0)
+                               count++;
                        break;
 
                case 'f': /* set faulty */
@@ -1067,25 +1195,32 @@ int Manage_subdevs(char *devname, int fd,
                        if ((sysfd >= 0 && write(sysfd, "faulty", 6) != 6) ||
                            (sysfd < 0 && ioctl(fd, SET_DISK_FAULTY,
                                                (unsigned long) stb.st_rdev))) {
-                               fprintf(stderr, Name ": set device faulty failed for %s:  %s\n",
-                                       dnprintable, strerror(errno));
+                               pr_err("set device faulty failed for %s:  %s\n",
+                                       dv->devname, strerror(errno));
                                if (sysfd >= 0)
                                        close(sysfd);
-                               return 1;
+                               goto abort;
                        }
                        if (sysfd >= 0)
                                close(sysfd);
                        sysfd = -1;
                        count++;
                        if (verbose >= 0)
-                               fprintf(stderr, Name ": set %s faulty in %s\n",
-                                       dnprintable, devname);
+                               pr_err("set %s faulty in %s\n",
+                                       dv->devname, devname);
                        break;
                }
        }
+       if (frozen > 0)
+               sysfs_set_str(&info, NULL, "sync_action","idle");
        if (test && count == 0)
                return 2;
        return 0;
+
+abort:
+       if (frozen > 0)
+               sysfs_set_str(&info, NULL, "sync_action","idle");
+       return 1;
 }
 
 int autodetect(void)
@@ -1101,22 +1236,21 @@ int autodetect(void)
        return rv;
 }
 
-int Update_subarray(char *dev, char *subarray, char *update, struct mddev_ident *ident, int quiet)
+int Update_subarray(char *dev, char *subarray, char *update, struct mddev_ident *ident, int verbose)
 {
        struct supertype supertype, *st = &supertype;
        int fd, rv = 2;
 
        memset(st, 0, sizeof(*st));
 
-       fd = open_subarray(dev, subarray, st, quiet);
+       fd = open_subarray(dev, subarray, st, verbose < 0);
        if (fd < 0)
                return 2;
 
        if (!st->ss->update_subarray) {
-               if (!quiet)
-                       fprintf(stderr,
-                               Name ": Operation not supported for %s metadata\n",
-                               st->ss->name);
+               if (verbose >= 0)
+                       pr_err("Operation not supported for %s metadata\n",
+                              st->ss->name);
                goto free_super;
        }
 
@@ -1126,18 +1260,17 @@ int Update_subarray(char *dev, char *subarray, char *update, struct mddev_ident
        rv = st->ss->update_subarray(st, subarray, update, ident);
 
        if (rv) {
-               if (!quiet)
-                       fprintf(stderr, Name ": Failed to update %s of subarray-%s in %s\n",
+               if (verbose >= 0)
+                       pr_err("Failed to update %s of subarray-%s in %s\n",
                                update, subarray, dev);
        } else if (st->update_tail)
                flush_metadata_updates(st);
        else
                st->ss->sync_metadata(st);
 
-       if (rv == 0 && strcmp(update, "name") == 0 && !quiet)
-               fprintf(stderr,
-                       Name ": Updated subarray-%s name from %s, UUIDs may have changed\n",
-                       subarray, dev);
+       if (rv == 0 && strcmp(update, "name") == 0 && verbose >= 0)
+               pr_err("Updated subarray-%s name from %s, UUIDs may have changed\n",
+                      subarray, dev);
 
  free_super:
        st->ss->free_super(st);
@@ -1146,9 +1279,8 @@ int Update_subarray(char *dev, char *subarray, char *update, struct mddev_ident
        return rv;
 }
 
-/* Move spare from one array to another
- * If adding to destination array fails
- * add back to original array
+/* Move spare from one array to another If adding to destination array fails
+ * add back to original array.
  * Returns 1 on success, 0 on failure */
 int move_spare(char *from_devname, char *to_devname, dev_t devid)
 {
@@ -1167,15 +1299,14 @@ int move_spare(char *from_devname, char *to_devname, dev_t devid)
 
        devlist.next = NULL;
        devlist.used = 0;
-       devlist.re_add = 0;
        devlist.writemostly = 0;
        devlist.devname = devname;
        sprintf(devname, "%d:%d", major(devid), minor(devid));
 
        devlist.disposition = 'r';
-       if (Manage_subdevs(from_devname, fd2, &devlist, -1, 0, NULL) == 0) {
+       if (Manage_subdevs(from_devname, fd2, &devlist, -1, 0, NULL, 0) == 0) {
                devlist.disposition = 'a';
-               if (Manage_subdevs(to_devname, fd1, &devlist, -1, 0, NULL) == 0) {
+               if (Manage_subdevs(to_devname, fd1, &devlist, -1, 0, NULL, 0) == 0) {
                        /* make sure manager is aware of changes */
                        ping_manager(to_devname);
                        ping_manager(from_devname);
@@ -1183,7 +1314,7 @@ int move_spare(char *from_devname, char *to_devname, dev_t devid)
                        close(fd2);
                        return 1;
                }
-               else Manage_subdevs(from_devname, fd2, &devlist, -1, 0, NULL);
+               else Manage_subdevs(from_devname, fd2, &devlist, -1, 0, NULL, 0);
        }
        close(fd1);
        close(fd2);