]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - Incremental.c
mdadm.h: Introduced unaligned {get,put}_unaligned{16,32}()
[thirdparty/mdadm.git] / Incremental.c
index f576cbae35b1d8669b9bad2eb3937105caac9134..d4d3c353560d8fb4946c8fad1b0840a0de82e0dd 100644 (file)
@@ -2,7 +2,7 @@
  * Incremental.c - support --incremental.  Part of:
  * mdadm - manage Linux "md" devices aka RAID arrays.
  *
- * Copyright (C) 2006-2009 Neil Brown <neilb@suse.de>
+ * Copyright (C) 2006-2013 Neil Brown <neilb@suse.de>
  *
  *
  *    This program is free software; you can redistribute it and/or modify
@@ -29,6 +29,7 @@
  */
 
 #include       "mdadm.h"
+#include       <sys/wait.h>
 #include       <dirent.h>
 #include       <ctype.h>
 
@@ -43,17 +44,10 @@ static int try_spare(char *devname, int *dfdp, struct dev_policy *pol,
                     struct supertype *st, int verbose);
 
 static int Incremental_container(struct supertype *st, char *devname,
-                                char *homehost,
-                                int verbose, int runstop, int autof);
+                                struct context *c, char *only);
 
-static struct mddev_ident *search_mdstat(struct supertype *st,
-                                          struct mdinfo *info,
-                                          char *devname,
-                                          int verbose, int *rvp);
-
-int Incremental(char *devname, int verbose, int runstop,
-               struct supertype *st, char *homehost, int require_homehost,
-               int autof)
+int Incremental(struct mddev_dev *devlist, struct context *c,
+               struct supertype *st)
 {
        /* Add this device to an array, creating the array if necessary
         * and starting the array if sensible or - if runstop>0 - if possible.
@@ -92,11 +86,12 @@ int Incremental(char *devname, int verbose, int runstop,
         * - if number of OK devices match expected, or -R and there are enough,
         *   start the array (auto-readonly).
         */
-       struct stat stb;
+       dev_t rdev, rdev2;
        struct mdinfo info, dinfo;
        struct mdinfo *sra = NULL, *d;
        struct mddev_ident *match;
        char chosen_name[1024];
+       char *md_devname;
        int rv = 1;
        struct map_ent *mp, *map = NULL;
        int dfd = -1, mdfd = -1;
@@ -104,29 +99,20 @@ int Incremental(char *devname, int verbose, int runstop,
        int active_disks;
        int trustworthy;
        char *name_to_use;
-       mdu_array_info_t ainf;
        struct dev_policy *policy = NULL;
        struct map_ent target_array;
        int have_target;
+       char *devname = devlist->devname;
+       int journal_device_missing = 0;
 
        struct createinfo *ci = conf_get_create_info();
 
-       if (stat(devname, &stb) < 0) {
-               if (verbose >= 0)
-                       fprintf(stderr, Name ": stat failed for %s: %s.\n",
-                               devname, strerror(errno));
-               return rv;
-       }
-       if ((stb.st_mode & S_IFMT) != S_IFBLK) {
-               if (verbose >= 0)
-                       fprintf(stderr, Name ": %s is not a block device.\n",
-                               devname);
+       if (!stat_is_blkdev(devname, &rdev))
                return rv;
-       }
-       dfd = dev_open(devname, O_RDONLY|O_EXCL);
+       dfd = dev_open(devname, O_RDONLY);
        if (dfd < 0) {
-               if (verbose >= 0)
-                       fprintf(stderr, Name ": cannot open %s: %s.\n",
+               if (c->verbose >= 0)
+                       pr_err("cannot open %s: %s.\n",
                                devname, strerror(errno));
                return rv;
        }
@@ -138,65 +124,72 @@ int Incremental(char *devname, int verbose, int runstop,
                        rv = st->ss->load_container(st, dfd, NULL);
 
                close(dfd);
-               if (!rv && st->ss->container_content)
-                       return Incremental_container(st, devname, homehost,
-                                                    verbose, runstop, autof);
+               if (!rv && st->ss->container_content) {
+                       if (map_lock(&map))
+                               pr_err("failed to get exclusive lock on mapfile\n");
+                       if (c->export)
+                               printf("MD_DEVNAME=%s\n", devname);
+                       rv = Incremental_container(st, devname, c, NULL);
+                       map_unlock(&map);
+                       return rv;
+               }
 
-               fprintf(stderr, Name ": %s is not part of an md array.\n",
+               pr_err("%s is not part of an md array.\n",
                        devname);
                return rv;
        }
 
        /* 1/ Check if device is permitted by mdadm.conf */
 
-       if (!conf_test_dev(devname)) {
-               if (verbose >= 0)
-                       fprintf(stderr, Name
-                               ": %s not permitted by mdadm.conf.\n",
-                               devname);
+       for (;devlist; devlist = devlist->next)
+               if (conf_test_dev(devlist->devname))
+                       break;
+       if (!devlist) {
+               devlist = conf_get_devs();
+               for (;devlist; devlist = devlist->next) {
+                       if (stat_is_blkdev(devlist->devname, &rdev2) &&
+                           rdev2 == rdev)
+                               break;
+               }
+       }
+       if (!devlist) {
+               if (c->verbose >= 0)
+                       pr_err("%s not permitted by mdadm.conf.\n",
+                              devname);
                goto out;
        }
 
        /* 2/ Find metadata, reject if none appropriate (check
         *            version/name from args) */
 
-       if (fstat(dfd, &stb) < 0) {
-               if (verbose >= 0)
-                       fprintf(stderr, Name ": fstat failed for %s: %s.\n",
-                               devname, strerror(errno));
+       if (!fstat_is_blkdev(dfd, devname, &rdev))
                goto out;
-       }
-       if ((stb.st_mode & S_IFMT) != S_IFBLK) {
-               if (verbose >= 0)
-                       fprintf(stderr, Name ": %s is not a block device.\n",
-                               devname);
-               goto out;
-       }
 
-       dinfo.disk.major = major(stb.st_rdev);
-       dinfo.disk.minor = minor(stb.st_rdev);
+       dinfo.disk.major = major(rdev);
+       dinfo.disk.minor = minor(rdev);
 
        policy = disk_policy(&dinfo);
        have_target = policy_check_path(&dinfo, &target_array);
 
-       if (st == NULL && (st = guess_super(dfd)) == NULL) {
-               if (verbose >= 0)
-                       fprintf(stderr, Name
-                               ": no recognisable superblock on %s.\n",
-                               devname);
+       if (st == NULL && (st = guess_super_type(dfd, guess_array)) == NULL) {
+               if (c->verbose >= 0)
+                       pr_err("no recognisable superblock on %s.\n",
+                              devname);
                rv = try_spare(devname, &dfd, policy,
                               have_target ? &target_array : NULL,
-                              st, verbose);
+                              NULL, c->verbose);
                goto out;
        }
+       st->ignore_hw_compat = 0;
+
        if (st->ss->compare_super == NULL ||
-           st->ss->load_super(st, dfd, NULL)) {
-               if (verbose >= 0)
-                       fprintf(stderr, Name ": no RAID superblock on %s.\n",
+           st->ss->load_super(st, dfd, c->verbose >= 0 ? devname : NULL)) {
+               if (c->verbose >= 0)
+                       pr_err("no RAID superblock on %s.\n",
                                devname);
                rv = try_spare(devname, &dfd, policy,
                               have_target ? &target_array : NULL,
-                              st, verbose);
+                              st, c->verbose);
                free(st);
                goto out;
        }
@@ -205,15 +198,14 @@ int Incremental(char *devname, int verbose, int runstop,
        st->ss->getinfo_super(st, &info, NULL);
 
        /* 3/ Check if there is a match in mdadm.conf */
-       match = search_mdstat(st, &info, devname, verbose, &rv);
+       match = conf_match(st, &info, devname, c->verbose, &rv);
        if (!match && rv == 2)
                goto out;
 
-       if (match && match->devname
-           && strcasecmp(match->devname, "<ignore>") == 0) {
-               if (verbose >= 0)
-                       fprintf(stderr, Name ": array containing %s is explicitly"
-                               " ignored by mdadm.conf\n",
+       if (match && match->devname &&
+           strcasecmp(match->devname, "<ignore>") == 0) {
+               if (c->verbose >= 0)
+                       pr_err("array containing %s is explicitly ignored by mdadm.conf\n",
                                devname);
                goto out;
        }
@@ -224,21 +216,18 @@ int Incremental(char *devname, int verbose, int runstop,
         * on that. */
        if (match)
                trustworthy = LOCAL;
-       else if (st->ss->match_home(st, homehost) == 1)
+       else if (st->ss->match_home(st, c->homehost) == 1)
                trustworthy = LOCAL;
        else if (st->ss->match_home(st, "any") == 1)
                trustworthy = LOCAL_ANY;
        else
                trustworthy = FOREIGN;
 
-
        if (!match && !conf_test_metadata(st->ss->name, policy,
                                          (trustworthy == LOCAL))) {
-               if (verbose >= 1)
-                       fprintf(stderr, Name
-                               ": %s has metadata type %s for which "
-                               "auto-assembly is disabled\n",
-                               devname, st->ss->name);
+               if (c->verbose >= 1)
+                       pr_err("%s has metadata type %s for which auto-assembly is disabled\n",
+                              devname, st->ss->name);
                goto out;
        }
        if (trustworthy == LOCAL_ANY)
@@ -250,19 +239,17 @@ int Incremental(char *devname, int verbose, int runstop,
         * CREATE.
         */
        if (match && match->autof)
-               autof = match->autof;
-       if (autof == 0)
-               autof = ci->autof;
+               c->autof = match->autof;
+       if (c->autof == 0)
+               c->autof = ci->autof;
 
        name_to_use = info.name;
-       if (name_to_use[0] == 0 &&
-           info.array.level == LEVEL_CONTAINER &&
-           trustworthy == LOCAL) {
+       if (name_to_use[0] == 0 && info.array.level == LEVEL_CONTAINER) {
                name_to_use = info.text_version;
                trustworthy = METADATA;
        }
        if (name_to_use[0] && trustworthy != LOCAL &&
-           ! require_homehost &&
+           ! c->require_homehost &&
            conf_name_is_free(name_to_use))
                trustworthy = LOCAL;
 
@@ -275,61 +262,86 @@ int Incremental(char *devname, int verbose, int runstop,
        /* 4/ Check if array exists.
         */
        if (map_lock(&map))
-               fprintf(stderr, Name ": failed to get exclusive lock on "
-                       "mapfile\n");
+               pr_err("failed to get exclusive lock on mapfile\n");
+       /* Now check we can get O_EXCL.  If not, probably "mdadm -A" has
+        * taken over
+        */
+       dfd = dev_open(devname, O_RDONLY|O_EXCL);
+       if (dfd < 0) {
+               if (c->verbose >= 0)
+                       pr_err("cannot reopen %s: %s.\n",
+                               devname, strerror(errno));
+               goto out_unlock;
+       }
+       /* Cannot hold it open while we add the device to the array,
+        * so we must release the O_EXCL and depend on the map_lock()
+        * So now is the best time to remove any partitions.
+        */
+       remove_partitions(dfd);
+       close(dfd);
+       dfd = -1;
+
        mp = map_by_uuid(&map, info.uuid);
        if (mp)
-               mdfd = open_dev(mp->devnum);
+               mdfd = open_dev(mp->devnm);
        else
                mdfd = -1;
 
        if (mdfd < 0) {
 
+               /* Skip the clustered ones. This should be started by
+                * clustering resource agents
+                */
+               if (info.array.state & (1 << MD_SB_CLUSTERED))
+                       goto out;
+
                /* Couldn't find an existing array, maybe make a new one */
                mdfd = create_mddev(match ? match->devname : NULL,
-                                   name_to_use, autof, trustworthy, chosen_name);
+                                   name_to_use, c->autof, trustworthy, chosen_name, 0);
 
                if (mdfd < 0)
-                       goto out;
+                       goto out_unlock;
 
-               sysfs_init(&info, mdfd, 0);
+               if (sysfs_init(&info, mdfd, NULL)) {
+                       pr_err("unable to initialize sysfs for %s\n",
+                              chosen_name);
+                       rv = 2;
+                       goto out_unlock;
+               }
 
                if (set_array_info(mdfd, st, &info) != 0) {
-                       fprintf(stderr, Name ": failed to set array info for %s: %s\n",
+                       pr_err("failed to set array info for %s: %s\n",
                                chosen_name, strerror(errno));
                        rv = 2;
-                       goto out;
+                       goto out_unlock;
                }
 
                dinfo = info;
-               dinfo.disk.major = major(stb.st_rdev);
-               dinfo.disk.minor = minor(stb.st_rdev);
+               dinfo.disk.major = major(rdev);
+               dinfo.disk.minor = minor(rdev);
                if (add_disk(mdfd, st, &info, &dinfo) != 0) {
-                       fprintf(stderr, Name ": failed to add %s to %s: %s.\n",
+                       pr_err("failed to add %s to new array %s: %s.\n",
                                devname, chosen_name, strerror(errno));
                        ioctl(mdfd, STOP_ARRAY, 0);
                        rv = 2;
-                       goto out;
+                       goto out_unlock;
                }
-               sra = sysfs_read(mdfd, -1, (GET_DEVS | GET_STATE |
-                                           GET_OFFSET | GET_SIZE));
-       
+               sra = sysfs_read(mdfd, NULL, (GET_DEVS | GET_STATE |
+                                             GET_OFFSET | GET_SIZE));
+
                if (!sra || !sra->devs || sra->devs->disk.raid_disk >= 0) {
                        /* It really should be 'none' - must be old buggy
                         * kernel, and mdadm -I may not be able to complete.
                         * So reject it.
                         */
                        ioctl(mdfd, STOP_ARRAY, NULL);
-                       fprintf(stderr, Name
-                     ": You have an old buggy kernel which cannot support\n"
-                               "      --incremental reliably.  Aborting.\n");
-                       sysfs_free(sra);
+                       pr_err("You have an old buggy kernel which cannot support\n      --incremental reliably.  Aborting.\n");
                        rv = 2;
-                       goto out;
+                       goto out_unlock;
                }
                info.array.working_disks = 1;
                /* 6/ Make sure /var/run/mdadm.map contains this array. */
-               map_update(&map, fd2devnum(mdfd),
+               map_update(&map, fd2devnm(mdfd),
                           info.text_version,
                           info.uuid, chosen_name);
        } else {
@@ -343,13 +355,13 @@ int Incremental(char *devname, int verbose, int runstop,
                struct supertype *st2;
                struct mdinfo info2, *d;
 
-               sra = sysfs_read(mdfd, -1, (GET_DEVS | GET_STATE |
+               sra = sysfs_read(mdfd, NULL, (GET_DEVS | GET_STATE |
                                            GET_OFFSET | GET_SIZE));
-       
+
                if (mp->path)
                        strcpy(chosen_name, mp->path);
                else
-                       strcpy(chosen_name, devnum2devname(mp->devnum));
+                       strcpy(chosen_name, mp->devnm);
 
                /* It is generally not OK to add non-spare drives to a
                 * running array as they are probably missing because
@@ -362,37 +374,38 @@ int Incremental(char *devname, int verbose, int runstop,
                 * flag has a different meaning.  The test has to happen
                 * at the device level there
                 */
-               if (!st->ss->external
-                   && (info.disk.state & (1<<MD_DISK_SYNC)) != 0
-                   && ! policy_action_allows(policy, st->ss->name,
-                                             act_re_add)
-                   && runstop < 1) {
-                       if (ioctl(mdfd, GET_ARRAY_INFO, &ainf) == 0) {
-                               fprintf(stderr, Name
-                                       ": not adding %s to active array (without --run) %s\n",
-                                       devname, chosen_name);
+               if (!st->ss->external &&
+                   (info.disk.state & (1 << MD_DISK_SYNC)) != 0 &&
+                   !policy_action_allows(policy, st->ss->name, act_re_add) &&
+                   c->runstop < 1) {
+                       if (md_array_active(mdfd)) {
+                               pr_err("not adding %s to active array (without --run) %s\n",
+                                      devname, chosen_name);
                                rv = 2;
-                               goto out;
+                               goto out_unlock;
                        }
                }
                if (!sra) {
                        rv = 2;
-                       goto out;
+                       goto out_unlock;
                }
                if (sra->devs) {
                        sprintf(dn, "%d:%d", sra->devs->disk.major,
                                sra->devs->disk.minor);
                        dfd2 = dev_open(dn, O_RDONLY);
+                       if (dfd2 < 0) {
+                               pr_err("unable to open %s\n", devname);
+                               rv = 2;
+                               goto out_unlock;
+                       }
                        st2 = dup_super(st);
                        if (st2->ss->load_super(st2, dfd2, NULL) ||
                            st->ss->compare_super(st, st2) != 0) {
-                               fprintf(stderr, Name
-                                       ": metadata mismatch between %s and "
-                                       "chosen array %s\n",
-                                       devname, chosen_name);
+                               pr_err("metadata mismatch between %s and chosen array %s\n",
+                                      devname, chosen_name);
                                close(dfd2);
                                rv = 2;
-                               goto out;
+                               goto out_unlock;
                        }
                        close(dfd2);
                        st2->ss->getinfo_super(st2, &info2, NULL);
@@ -400,66 +413,87 @@ int Incremental(char *devname, int verbose, int runstop,
                        if (info.array.level != info2.array.level ||
                            memcmp(info.uuid, info2.uuid, 16) != 0 ||
                            info.array.raid_disks != info2.array.raid_disks) {
-                               fprintf(stderr, Name
-                                       ": unexpected difference between %s and %s.\n",
-                                       chosen_name, devname);
+                               pr_err("unexpected difference between %s and %s.\n",
+                                      chosen_name, devname);
                                rv = 2;
-                               goto out;
+                               goto out_unlock;
                        }
                }
-               info2.disk.major = major(stb.st_rdev);
-               info2.disk.minor = minor(stb.st_rdev);
+               info.disk.major = major(rdev);
+               info.disk.minor = minor(rdev);
                /* add disk needs to know about containers */
                if (st->ss->external)
                        sra->array.level = LEVEL_CONTAINER;
-               err = add_disk(mdfd, st, sra, &info2);
+
+               if (info.array.state & (1 << MD_SB_CLUSTERED))
+                       info.disk.state |= (1 << MD_DISK_CLUSTER_ADD);
+
+               err = add_disk(mdfd, st, sra, &info);
                if (err < 0 && errno == EBUSY) {
                        /* could be another device present with the same
                         * disk.number. Find and reject any such
                         */
                        find_reject(mdfd, st, sra, info.disk.number,
-                                   info.events, verbose, chosen_name);
-                       err = add_disk(mdfd, st, sra, &info2);
+                                   info.events, c->verbose, chosen_name);
+                       err = add_disk(mdfd, st, sra, &info);
+               }
+               if (err < 0 && errno == EINVAL &&
+                   info.disk.state & (1<<MD_DISK_SYNC)) {
+                       /* Maybe it needs to be added as a spare */
+                       if (policy_action_allows(policy, st->ss->name,
+                                                act_force_spare)) {
+                               info.disk.state &= ~(1<<MD_DISK_SYNC);
+                               err = add_disk(mdfd, st, sra, &info);
+                       } else
+                               if (c->verbose >= 0)
+                                       pr_err("can only add %s to %s as a spare, and force-spare is not set.\n",
+                                              devname, chosen_name);
                }
                if (err < 0) {
-                       fprintf(stderr, Name ": failed to add %s to %s: %s.\n",
+                       pr_err("failed to add %s to existing array %s: %s.\n",
                                devname, chosen_name, strerror(errno));
                        rv = 2;
-                       goto out;
+                       goto out_unlock;
                }
                info.array.working_disks = 0;
                for (d = sra->devs; d; d=d->next)
                        info.array.working_disks ++;
-                       
+
+       }
+       if (strncmp(chosen_name, "/dev/md/", 8) == 0)
+               md_devname = chosen_name+8;
+       else
+               md_devname = chosen_name;
+       if (c->export) {
+               printf("MD_DEVICE=%s\n", fd2devnm(mdfd));
+               printf("MD_DEVNAME=%s\n", md_devname);
+               printf("MD_FOREIGN=%s\n", trustworthy == FOREIGN ? "yes" : "no");
        }
 
        /* 7/ Is there enough devices to possibly start the array? */
        /* 7a/ if not, finish with success. */
        if (info.array.level == LEVEL_CONTAINER) {
-               int devnum = devnum; /* defined and used iff ->external */
+               char devnm[32];
                /* Try to assemble within the container */
-               map_unlock(&map);
-               sysfs_uevent(&info, "change");
-               if (verbose >= 0)
-                       fprintf(stderr, Name
-                               ": container %s now has %d devices\n",
-                               chosen_name, info.array.working_disks);
+               sysfs_uevent(sra, "change");
+               if (!c->export && c->verbose >= 0)
+                       pr_err("container %s now has %d device%s\n",
+                              chosen_name, info.array.working_disks,
+                              info.array.working_disks == 1?"":"s");
                wait_for(chosen_name, mdfd);
                if (st->ss->external)
-                       devnum = fd2devnum(mdfd);
+                       strcpy(devnm, fd2devnm(mdfd));
+               if (st->ss->load_container)
+                       rv = st->ss->load_container(st, mdfd, NULL);
                close(mdfd);
                sysfs_free(sra);
-               rv = Incremental(chosen_name, verbose, runstop,
-                                NULL, homehost, require_homehost, autof);
-               if (rv == 1)
-                       /* Don't fail the whole -I if a subarray didn't
-                        * have enough devices to start yet
-                        */
-                       rv = 0;
+               if (!rv)
+                       rv = Incremental_container(st, chosen_name, c, NULL);
+               map_unlock(&map);
                /* after spare is added, ping monitor for external metadata
                 * so that it can eg. try to rebuild degraded array */
                if (st->ss->external)
-                       ping_monitor_by_id(devnum);
+                       ping_monitor(devnm);
                return rv;
        }
 
@@ -468,19 +502,24 @@ int Incremental(char *devname, int verbose, int runstop,
         * things change.
         */
        sysfs_free(sra);
-       sra = sysfs_read(mdfd, -1, (GET_DEVS | GET_STATE |
+       sra = sysfs_read(mdfd, NULL, (GET_DEVS | GET_STATE |
                                    GET_OFFSET | GET_SIZE));
        active_disks = count_active(st, sra, mdfd, &avail, &info);
+
+       journal_device_missing = (info.journal_device_required) && (info.journal_clean == 0);
+
+       if (info.consistency_policy == CONSISTENCY_POLICY_PPL)
+               info.array.state |= 1;
+
        if (enough(info.array.level, info.array.raid_disks,
-                  info.array.layout, info.array.state & 1,
-                  avail, active_disks) == 0) {
-               if (verbose >= 0)
-                       fprintf(stderr, Name
-                            ": %s attached to %s, not enough to start (%d).\n",
-                               devname, chosen_name, active_disks);
-               map_unlock(&map);
+                  info.array.layout, info.array.state & 1, avail) == 0) {
+               if (c->export) {
+                       printf("MD_STARTED=no\n");
+               } else if (c->verbose >= 0)
+                       pr_err("%s attached to %s, not enough to start (%d).\n",
+                              devname, chosen_name, active_disks);
                rv = 0;
-               goto out;
+               goto out_unlock;
        }
 
        /* 7b/ if yes, */
@@ -489,33 +528,40 @@ int Incremental(char *devname, int verbose, int runstop,
        /*   + add any bitmap file  */
        /*   + start the array (auto-readonly). */
 
-       if (ioctl(mdfd, GET_ARRAY_INFO, &ainf) == 0) {
-               if (verbose >= 0)
-                       fprintf(stderr, Name
-                          ": %s attached to %s which is already active.\n",
-                               devname, chosen_name);
-               map_unlock(&map);
+       if (md_array_active(mdfd)) {
+               if (c->export) {
+                       printf("MD_STARTED=already\n");
+               } else if (c->verbose >= 0)
+                       pr_err("%s attached to %s which is already active.\n",
+                              devname, chosen_name);
                rv = 0;
-               goto out;
+               goto out_unlock;
        }
 
        map_unlock(&map);
-       if (runstop > 0 || active_disks >= info.array.working_disks) {
+       if (c->runstop > 0 || (!journal_device_missing && active_disks >= info.array.working_disks)) {
                struct mdinfo *dsk;
                /* Let's try to start it */
+
+               if (journal_device_missing)
+                       pr_err("Trying to run with missing journal device\n");
+               if (info.reshape_active && !(info.reshape_active & RESHAPE_NO_BACKUP)) {
+                       pr_err("%s: This array is being reshaped and cannot be started\n",
+                              chosen_name);
+                       cont_err("by --incremental.  Please use --assemble\n");
+                       goto out;
+               }
                if (match && match->bitmap_file) {
                        int bmfd = open(match->bitmap_file, O_RDWR);
                        if (bmfd < 0) {
-                               fprintf(stderr, Name
-                                       ": Could not open bitmap file %s.\n",
-                                       match->bitmap_file);
+                               pr_err("Could not open bitmap file %s.\n",
+                                      match->bitmap_file);
                                goto out;
                        }
                        if (ioctl(mdfd, SET_BITMAP_FILE, bmfd) != 0) {
                                close(bmfd);
-                               fprintf(stderr, Name
-                                       ": Failed to set bitmapfile for %s.\n",
-                                       chosen_name);
+                               pr_err("Failed to set bitmapfile for %s.\n",
+                                      chosen_name);
                                goto out;
                        }
                        close(bmfd);
@@ -527,17 +573,22 @@ int Incremental(char *devname, int verbose, int runstop,
                        if (d->disk.state & (1<<MD_DISK_REMOVED))
                                remove_disk(mdfd, st, sra, d);
 
-               if ((sra == NULL || active_disks >= info.array.working_disks)
-                   && trustworthy != FOREIGN)
+               if ((sra == NULL || active_disks >= info.array.working_disks) &&
+                   trustworthy != FOREIGN)
                        rv = ioctl(mdfd, RUN_ARRAY, NULL);
                else
                        rv = sysfs_set_str(sra, NULL,
                                           "array_state", "read-auto");
+               /* Array might be O_EXCL which  will interfere with
+                * fsck and mount.  So re-open without O_EXCL.
+                */
+               reopen_mddev(mdfd);
                if (rv == 0) {
-                       if (verbose >= 0)
-                               fprintf(stderr, Name
-                                       ": %s attached to %s, which has been started.\n",
-                                       devname, chosen_name);
+                       if (c->export) {
+                               printf("MD_STARTED=yes\n");
+                       } else if (c->verbose >= 0)
+                               pr_err("%s attached to %s, which has been started.\n",
+                                      devname, chosen_name);
                        rv = 0;
                        wait_for(chosen_name, mdfd);
                        /* We just started the array, so some devices
@@ -547,23 +598,25 @@ int Incremental(char *devname, int verbose, int runstop,
                         * those devices we should re-add them now.
                         */
                        for (dsk = sra->devs; dsk ; dsk = dsk->next) {
-                               if (disk_action_allows(dsk, st->ss->name, act_re_add) &&
+                               if (disk_action_allows(dsk, st->ss->name,
+                                                      act_re_add) &&
                                    add_disk(mdfd, st, sra, dsk) == 0)
-                                       fprintf(stderr, Name
-                                               ": %s re-added to %s\n",
-                                               dsk->sys_name, chosen_name);
+                                       pr_err("%s re-added to %s\n",
+                                              dsk->sys_name, chosen_name);
                        }
                } else {
-                       fprintf(stderr, Name
-                             ": %s attached to %s, but failed to start: %s.\n",
-                               devname, chosen_name, strerror(errno));
+                       pr_err("%s attached to %s, but failed to start: %s.\n",
+                              devname, chosen_name, strerror(errno));
                        rv = 1;
                }
        } else {
-               if (verbose >= 0)
-                       fprintf(stderr, Name
-                          ": %s attached to %s, not enough to start safely.\n",
-                               devname, chosen_name);
+               if (c->export) {
+                       printf("MD_STARTED=unsafe\n");
+               } else if (journal_device_missing) {
+                       pr_err("Journal device is missing, not safe to start yet.\n");
+               } else if (c->verbose >= 0)
+                       pr_err("%s attached to %s, not enough to start safely.\n",
+                              devname, chosen_name);
                rv = 0;
        }
 out:
@@ -574,82 +627,11 @@ out:
                close(mdfd);
        if (policy)
                dev_policy_free(policy);
-       if (sra)
-               sysfs_free(sra);
+       sysfs_free(sra);
        return rv;
-}
-
-static struct mddev_ident *search_mdstat(struct supertype *st,
-                                          struct mdinfo *info,
-                                          char *devname,
-                                          int verbose, int *rvp)
-{
-       struct mddev_ident *array_list, *match;
-       array_list = conf_get_ident(NULL);
-       match = NULL;
-       for (; array_list; array_list = array_list->next) {
-               if (array_list->uuid_set &&
-                   same_uuid(array_list->uuid, info->uuid, st->ss->swapuuid)
-                   == 0) {
-                       if (verbose >= 2 && array_list->devname)
-                               fprintf(stderr, Name
-                                       ": UUID differs from %s.\n",
-                                       array_list->devname);
-                       continue;
-               }
-               if (array_list->name[0] &&
-                   strcasecmp(array_list->name, info->name) != 0) {
-                       if (verbose >= 2 && array_list->devname)
-                               fprintf(stderr, Name
-                                       ": Name differs from %s.\n",
-                                       array_list->devname);
-                       continue;
-               }
-               if (array_list->devices &&
-                   !match_oneof(array_list->devices, devname)) {
-                       if (verbose >= 2 && array_list->devname)
-                               fprintf(stderr, Name
-                                       ": Not a listed device for %s.\n",
-                                       array_list->devname);
-                       continue;
-               }
-               if (array_list->super_minor != UnSet &&
-                   array_list->super_minor != info->array.md_minor) {
-                       if (verbose >= 2 && array_list->devname)
-                               fprintf(stderr, Name
-                                       ": Different super-minor to %s.\n",
-                                       array_list->devname);
-                       continue;
-               }
-               if (!array_list->uuid_set &&
-                   !array_list->name[0] &&
-                   !array_list->devices &&
-                   array_list->super_minor == UnSet) {
-                       if (verbose >= 2 && array_list->devname)
-                               fprintf(stderr, Name
-                                       ": %s doesn't have any identifying information.\n",
-                                       array_list->devname);
-                       continue;
-               }
-               /* FIXME, should I check raid_disks and level too?? */
-
-               if (match) {
-                       if (verbose >= 0) {
-                               if (match->devname && array_list->devname)
-                                       fprintf(stderr, Name
-                                               ": we match both %s and %s - cannot decide which to use.\n",
-                                               match->devname, array_list->devname);
-                               else
-                                       fprintf(stderr, Name
-                                               ": multiple lines in mdadm.conf match\n");
-                       }
-                       *rvp = 2;
-                       match = NULL;
-                       break;
-               }
-               match = array_list;
-       }
-       return match;
+out_unlock:
+       map_unlock(&map);
+       goto out;
 }
 
 static void find_reject(int mdfd, struct supertype *st, struct mdinfo *sra,
@@ -660,14 +642,13 @@ static void find_reject(int mdfd, struct supertype *st, struct mdinfo *sra,
         * and events less than the passed events, and remove the device.
         */
        struct mdinfo *d;
-       mdu_array_info_t ra;
 
-       if (ioctl(mdfd, GET_ARRAY_INFO, &ra) == 0)
+       if (md_array_active(mdfd))
                return; /* not safe to remove from active arrays
                         * without thinking more */
 
        for (d = sra->devs; d ; d = d->next) {
-               char dn[10];
+               char dn[24]; // 2*11 bytes for ints (including sign) + colon + null byte
                int dfd;
                struct mdinfo info;
                sprintf(dn, "%d:%d", d->disk.major, d->disk.minor);
@@ -682,17 +663,15 @@ static void find_reject(int mdfd, struct supertype *st, struct mdinfo *sra,
                st->ss->free_super(st);
                close(dfd);
 
-               if (info.disk.number != number ||
-                   info.events >= events)
+               if (info.disk.number != number || info.events >= events)
                        continue;
 
                if (d->disk.raid_disk > -1)
                        sysfs_set_str(sra, d, "slot", "none");
                if (sysfs_set_str(sra, d, "state", "remove") == 0)
                        if (verbose >= 0)
-                               fprintf(stderr, Name
-                                       ": removing old device %s from %s\n",
-                                       d->sys_name+4, array_name);
+                               pr_err("removing old device %s from %s\n",
+                                      d->sys_name+4, array_name);
        }
 }
 
@@ -703,9 +682,11 @@ static int count_active(struct supertype *st, struct mdinfo *sra,
        /* count how many devices in sra think they are active */
        struct mdinfo *d;
        int cnt = 0;
+       int replcnt = 0;
        __u64 max_events = 0;
+       __u64 max_journal_events = 0;
        char *avail = NULL;
-       int *best;
+       int *best = NULL;
        char *devmap = NULL;
        int numdevs = 0;
        int devnum;
@@ -717,7 +698,7 @@ static int count_active(struct supertype *st, struct mdinfo *sra,
 
        for (d = sra->devs ; d ; d = d->next)
                numdevs++;
-       for (d = sra->devs, devnum=0 ; d ; d = d->next, devnum++) {
+       for (d = sra->devs, devnum = 0 ; d ; d = d->next, devnum++) {
                char dn[30];
                int dfd;
                int ok;
@@ -731,19 +712,19 @@ static int count_active(struct supertype *st, struct mdinfo *sra,
                close(dfd);
                if (ok != 0)
                        continue;
+
                info.array.raid_disks = raid_disks;
                st->ss->getinfo_super(st, &info, devmap + raid_disks * devnum);
+               if (info.disk.raid_disk == MD_DISK_ROLE_JOURNAL &&
+                   info.events > max_journal_events)
+                       max_journal_events = info.events;
                if (!avail) {
                        raid_disks = info.array.raid_disks;
-                       avail = calloc(raid_disks, 1);
-                       if (!avail) {
-                               fprintf(stderr, Name ": out of memory.\n");
-                               exit(1);
-                       }
+                       avail = xcalloc(raid_disks, 1);
                        *availp = avail;
 
-                       best = calloc(raid_disks, sizeof(int));
-                       devmap = calloc(raid_disks * numdevs, 1);
+                       best = xcalloc(raid_disks, sizeof(int));
+                       devmap = xcalloc(raid_disks, numdevs);
 
                        st->ss->getinfo_super(st, &info, devmap);
                }
@@ -769,21 +750,26 @@ static int count_active(struct supertype *st, struct mdinfo *sra,
                        else if (info.events == max_events+1) {
                                int i;
                                max_events = info.events;
-                               for (i=0; i < raid_disks; i++)
+                               for (i = 0; i < raid_disks; i++)
                                        if (avail[i])
                                                avail[i]--;
                                avail[info.disk.raid_disk] = 2;
                                best[info.disk.raid_disk] = devnum;
                                st->ss->getinfo_super(st, bestinfo, NULL);
                        } else { /* info.events much bigger */
-                               memset(avail, 0, info.disk.raid_disk);
+                               memset(avail, 0, raid_disks);
                                max_events = info.events;
                                avail[info.disk.raid_disk] = 2;
+                               best[info.disk.raid_disk] = devnum;
                                st->ss->getinfo_super(st, bestinfo, NULL);
                        }
-               }
+               } else if (info.disk.state & (1<<MD_DISK_REPLACEMENT))
+                       replcnt++;
                st->ss->free_super(st);
        }
+       if (max_journal_events >= max_events - 1)
+               bestinfo->journal_clean = 1;
+
        if (!avail)
                return 0;
        /* We need to reject any device that thinks the best device is
@@ -806,35 +792,42 @@ static int count_active(struct supertype *st, struct mdinfo *sra,
                if (avail[i])
                        cnt++;
        }
+       /* Also need to reject any spare device with an event count that
+        * is too high
+        */
+       for (d = sra->devs; d; d = d->next) {
+               if (!(d->disk.state & (1<<MD_DISK_SYNC)) &&
+                   d->events > max_events)
+                       d->disk.state |= (1 << MD_DISK_REMOVED);
+       }
        free(best);
        free(devmap);
-       return cnt;
+       return cnt + replcnt;
 }
 
 /* test if container has degraded member(s) */
-static int container_members_max_degradation(struct map_ent *map, struct map_ent *me)
+static int
+container_members_max_degradation(struct map_ent *map, struct map_ent *me)
 {
-       mdu_array_info_t array;
-       int afd;
-       int max_degraded = 0;
+       struct mdinfo *sra;
+       int degraded, max_degraded = 0;
 
        for(; map; map = map->next) {
-               if (!is_subarray(map->metadata) ||
-                   devname2devnum(map->metadata+1) != me->devnum)
-                       continue;
-               afd = open_dev(map->devnum);
-               if (afd < 0)
+               if (!metadata_container_matches(map->metadata, me->devnm))
                        continue;
                /* most accurate information regarding array degradation */
-               if (ioctl(afd, GET_ARRAY_INFO, &array) >= 0) {
-                       int degraded = array.raid_disks - array.active_disks -
-                                      array.spare_disks;
-                       if (degraded > max_degraded)
-                               max_degraded = degraded;
-               }
-               close(afd);
+               sra = sysfs_read(-1, map->devnm,
+                                GET_DISKS | GET_DEVS | GET_STATE);
+               if (!sra)
+                       continue;
+               degraded = sra->array.raid_disks - sra->array.active_disks -
+                       sra->array.spare_disks;
+               if (degraded > max_degraded)
+                       max_degraded = degraded;
+               sysfs_free(sra);
        }
-       return (max_degraded);
+
+       return max_degraded;
 }
 
 static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
@@ -852,12 +845,12 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
         * Return 0 on success, or some exit code on failure, probably 1.
         */
        int rv = 1;
-       struct stat stb;
+       dev_t rdev;
        struct map_ent *mp, *map = NULL;
        struct mdinfo *chosen = NULL;
        int dfd = *dfdp;
 
-       if (fstat(dfd, &stb) != 0)
+       if (!fstat_is_blkdev(dfd, devname, &rdev))
                return 1;
 
        /*
@@ -870,16 +863,15 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
         */
 
        if (map_lock(&map)) {
-               fprintf(stderr, Name ": failed to get exclusive lock on "
-                       "mapfile\n");
+               pr_err("failed to get exclusive lock on mapfile\n");
                return 1;
        }
        for (mp = map ; mp ; mp = mp->next) {
                struct supertype *st2;
                struct domainlist *dl = NULL;
                struct mdinfo *sra;
-               unsigned long long devsize;
-               unsigned long long component_size = 0;
+               unsigned long long devsize, freesize = 0;
+               struct spare_criteria sc = {0, 0};
 
                if (is_subarray(mp->metadata))
                        continue;
@@ -889,36 +881,29 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
                            (st->minor_version >= 0 &&
                             st->minor_version != st2->minor_version)) {
                                if (verbose > 1)
-                                       fprintf(stderr, Name ": not adding %s to %s as metadata type doesn't match\n",
+                                       pr_err("not adding %s to %s as metadata type doesn't match\n",
                                                devname, mp->path);
                                free(st2);
                                continue;
                        }
                        free(st2);
                }
-               sra = sysfs_read(-1, mp->devnum,
+               sra = sysfs_read(-1, mp->devnm,
                                 GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE|
-                                GET_DEGRADED|GET_COMPONENT|GET_VERSION);
-               if (!sra) {
-                       /* Probably a container - no degraded info */
-                       sra = sysfs_read(-1, mp->devnum,
-                                        GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE|
-                                        GET_COMPONENT|GET_VERSION);
-                       if (sra)
-                               sra->array.failed_disks = -1;
-               }
-               if (!sra)
+                                GET_COMPONENT|GET_VERSION);
+               if (sra)
+                       sra->array.failed_disks = -1;
+               else
                        continue;
                if (st == NULL) {
                        int i;
                        st2 = NULL;
-                       for(i=0; !st2 && superlist[i]; i++)
+                       for(i = 0; !st2 && superlist[i]; i++)
                                st2 = superlist[i]->match_metadata_desc(
                                        sra->text_version);
                        if (!st2) {
                                if (verbose > 1)
-                                       fprintf(stderr, Name ": not adding %s to %s"
-                                               " as metadata not recognised.\n",
+                                       pr_err("not adding %s to %s as metadata not recognised.\n",
                                                devname, mp->path);
                                goto next;
                        }
@@ -942,23 +927,31 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
                        /* true for containers, here we must read superblock
                         * to obtain minimum spare size */
                        struct supertype *st3 = dup_super(st2);
-                       int mdfd = open_dev(mp->devnum);
-                       if (!mdfd)
+                       int mdfd = open_dev(mp->devnm);
+                       if (mdfd < 0) {
+                               free(st3);
                                goto next;
+                       }
                        if (st3->ss->load_container &&
                            !st3->ss->load_container(st3, mdfd, mp->path)) {
-                               component_size = st3->ss->min_acceptable_spare_size(st3);
+                               if (st3->ss->get_spare_criteria)
+                                       st3->ss->get_spare_criteria(st3, &sc);
                                st3->ss->free_super(st3);
                        }
                        free(st3);
                        close(mdfd);
                }
                if ((sra->component_size > 0 &&
-                    st2->ss->avail_size(st2, devsize) < sra->component_size)
-                   ||
-                   (sra->component_size == 0 && devsize < component_size)) {
+                    st2->ss->validate_geometry(st2, sra->array.level, sra->array.layout,
+                                               sra->array.raid_disks, &sra->array.chunk_size,
+                                               sra->component_size,
+                                               sra->devs ? sra->devs->data_offset : INVALID_SECTORS,
+                                               devname, &freesize, sra->consistency_policy,
+                                               0) &&
+                    freesize < sra->component_size) ||
+                   (sra->component_size == 0 && devsize < sc.min_size)) {
                        if (verbose > 1)
-                               fprintf(stderr, Name ": not adding %s to %s as it is too small\n",
+                               pr_err("not adding %s to %s as it is too small\n",
                                        devname, mp->path);
                        goto next;
                }
@@ -976,8 +969,7 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
                                   sizeof(target->uuid)) == 0 &&
                            sra->array.failed_disks > 0) {
                                /* This is our target!! */
-                               if (chosen)
-                                       sysfs_free(chosen);
+                               sysfs_free(chosen);
                                chosen = sra;
                                sra = NULL;
                                /* skip to end so we don't check any more */
@@ -994,8 +986,7 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
                if (domain_test(dl, pol, st2->ss->name) != 1) {
                        /* domain test fails */
                        if (verbose > 1)
-                               fprintf(stderr, Name ": not adding %s to %s as"
-                                       " it is not in a compatible domain\n",
+                               pr_err("not adding %s to %s as it is not in a compatible domain\n",
                                        devname, mp->path);
 
                        goto next;
@@ -1010,8 +1001,7 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
                        sra = NULL;
                }
        next:
-               if (sra)
-                       sysfs_free(sra);
+               sysfs_free(sra);
                if (st != st2)
                        free(st2);
                if (dl)
@@ -1019,34 +1009,35 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
        }
        if (chosen) {
                /* add current device to chosen array as a spare */
-               int mdfd = open_dev(devname2devnum(chosen->sys_name));
+               int mdfd = open_dev(chosen->sys_name);
                if (mdfd >= 0) {
                        struct mddev_dev devlist;
-                       char devname[20];
+                       char chosen_devname[24]; // 2*11 for int (including signs) + colon + null
                        devlist.next = NULL;
                        devlist.used = 0;
-                       devlist.re_add = 0;
-                       devlist.writemostly = 0;
-                       devlist.devname = devname;
-                       sprintf(devname, "%d:%d", major(stb.st_rdev),
-                               minor(stb.st_rdev));
+                       devlist.writemostly = FlagDefault;
+                       devlist.failfast = FlagDefault;
+                       devlist.devname = chosen_devname;
+                       sprintf(chosen_devname, "%d:%d", major(rdev),
+                               minor(rdev));
                        devlist.disposition = 'a';
                        close(dfd);
                        *dfdp = -1;
                        rv =  Manage_subdevs(chosen->sys_name, mdfd, &devlist,
-                                            -1, 0, NULL);
+                                            -1, 0, NULL, 0);
                        close(mdfd);
                }
                if (verbose > 0) {
                        if (rv == 0)
-                               fprintf(stderr, Name ": added %s as spare for %s\n",
+                               pr_err("added %s as spare for %s\n",
                                        devname, chosen->sys_name);
                        else
-                               fprintf(stderr, Name ": failed to add %s as spare for %s\n",
+                               pr_err("failed to add %s as spare for %s\n",
                                        devname, chosen->sys_name);
                }
                sysfs_free(chosen);
        }
+       map_unlock(&map);
        return rv;
 }
 
@@ -1089,9 +1080,9 @@ static int partition_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
                struct supertype *st2 = NULL;
                char *devname = NULL;
                unsigned long long devsectors;
+               char *pathlist[2];
 
-               if (de->d_ino == 0 ||
-                   de->d_name[0] == '.' ||
+               if (de->d_ino == 0 || de->d_name[0] == '.' ||
                    (de->d_type != DT_LNK && de->d_type != DT_UNKNOWN))
                        goto next;
 
@@ -1104,7 +1095,9 @@ static int partition_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
                        /* This is a partition - skip it */
                        goto next;
 
-               pol2 = path_policy(de->d_name, type_disk);
+               pathlist[0] = de->d_name;
+               pathlist[1] = NULL;
+               pol2 = path_policy(pathlist, type_disk);
 
                domain_merge(&domlist, pol2, st ? st->ss->name : NULL);
                if (domain_test(domlist, pol, st ? st->ss->name : NULL) != 1)
@@ -1129,9 +1122,9 @@ static int partition_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
                        st2 = dup_super(st);
                else
                        st2 = guess_super_type(fd, guess_partitions);
-               if (st2 == NULL ||
-                   st2->ss->load_super(st2, fd, NULL) < 0)
+               if (st2 == NULL || st2->ss->load_super(st2, fd, NULL) < 0)
                        goto next;
+               st2->ignore_hw_compat = 0;
 
                if (!st) {
                        /* Check domain policy again, this time referring to metadata */
@@ -1157,8 +1150,7 @@ static int partition_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
                 * metadata which makes better use of the device can
                 * be found.
                 */
-               if (chosen == NULL ||
-                   chosen_size < info.component_size) {
+               if (chosen == NULL || chosen_size < info.component_size) {
                        chosen_size = info.component_size;
                        free(chosen);
                        chosen = devname;
@@ -1183,6 +1175,8 @@ static int partition_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
                        close(fd);
        }
 
+       closedir(dir);
+
        if (!chosen)
                return 1;
 
@@ -1265,8 +1259,7 @@ static int try_spare(char *devname, int *dfdp, struct dev_policy *pol,
                    !policy_action_allows(pol, st?st->ss->name:NULL,
                                          act_spare_same_slot)) {
                        if (verbose > 1)
-                               fprintf(stderr, Name ": %s is not bare, so not "
-                                       "considering as a spare\n",
+                               pr_err("%s is not bare, so not considering as a spare\n",
                                        devname);
                        return 1;
                }
@@ -1278,7 +1271,7 @@ static int try_spare(char *devname, int *dfdp, struct dev_policy *pol,
         * what arrays might be candidates.
         */
        if (st) {
-               /* just try try 'array' or 'partition' based on this metadata */
+               /* just try to add 'array' or 'partition' based on this metadata */
                if (st->ss->add_to_super)
                        return array_try_spare(devname, dfdp, pol, target, bare,
                                               st, verbose);
@@ -1309,7 +1302,7 @@ static int try_spare(char *devname, int *dfdp, struct dev_policy *pol,
        return rv;
 }
 
-int IncrementalScan(int verbose)
+int IncrementalScan(struct context *c, char *devnm)
 {
        /* look at every device listed in the 'map' file.
         * If one is found that is not running then:
@@ -1321,27 +1314,66 @@ int IncrementalScan(int verbose)
        struct map_ent *me;
        struct mddev_ident *devs, *mddev;
        int rv = 0;
+       char container[32];
+       char *only = NULL;
 
        map_read(&mapl);
        devs = conf_get_ident(NULL);
 
+restart:
        for (me = mapl ; me ; me = me->next) {
-               mdu_array_info_t array;
-               mdu_bitmap_file_t bmf;
                struct mdinfo *sra;
-               int mdfd = open_dev(me->devnum);
+               int mdfd;
+
+               if (devnm && strcmp(devnm, me->devnm) != 0)
+                       continue;
+               if (me->metadata[0] == '/') {
+                       char *sl;
+
+                       if (!devnm)
+                               continue;
+
+                       /* member array, need to work on container */
+                       strncpy(container, me->metadata+1, 32);
+                       container[31] = 0;
+                       sl = strchr(container, '/');
+                       if (sl)
+                               *sl = 0;
+                       only = devnm;
+                       devnm = container;
+                       goto restart;
+               }
+               mdfd = open_dev(me->devnm);
 
                if (mdfd < 0)
                        continue;
-               if (ioctl(mdfd, GET_ARRAY_INFO, &array) == 0 ||
-                   errno != ENODEV) {
+               if (!isdigit(me->metadata[0])) {
+                       /* must be a container */
+                       struct supertype *st = super_by_fd(mdfd, NULL);
+                       int ret = 0;
+                       struct map_ent *map = NULL;
+
+                       if (st && st->ss->load_container)
+                               ret = st->ss->load_container(st, mdfd, NULL);
+                       close(mdfd);
+                       if (!ret && st && st->ss->container_content) {
+                               if (map_lock(&map))
+                                       pr_err("failed to get exclusive lock on mapfile\n");
+                               ret = Incremental_container(st, me->path, c, only);
+                               map_unlock(&map);
+                       }
+                       if (ret)
+                               rv = 1;
+                       continue;
+               }
+               if (md_array_active(mdfd)) {
                        close(mdfd);
                        continue;
                }
                /* Ok, we can try this one.   Maybe it needs a bitmap */
                for (mddev = devs ; mddev ; mddev = mddev->next)
-                       if (mddev->devname && me->path
-                           && devname_matches(mddev->devname, me->path))
+                       if (mddev->devname && me->path &&
+                           devname_matches(mddev->devname, me->path))
                                break;
                if (mddev && mddev->bitmap_file) {
                        /*
@@ -1349,42 +1381,42 @@ int IncrementalScan(int verbose)
                         * is a hint only
                         */
                        int added = -1;
-                       if (ioctl(mdfd, GET_ARRAY_INFO, &bmf) < 0) {
-                               int bmfd = open(mddev->bitmap_file, O_RDWR);
-                               if (bmfd >= 0) {
-                                       added = ioctl(mdfd, SET_BITMAP_FILE,
-                                                     bmfd);
-                                       close(bmfd);
-                               }
+                       int bmfd;
+
+                       bmfd = open(mddev->bitmap_file, O_RDWR);
+                       if (bmfd >= 0) {
+                               added = ioctl(mdfd, SET_BITMAP_FILE, bmfd);
+                               close(bmfd);
                        }
-                       if (verbose >= 0) {
+                       if (c->verbose >= 0) {
                                if (added == 0)
-                                       fprintf(stderr, Name
-                                               ": Added bitmap %s to %s\n",
-                                               mddev->bitmap_file, me->path);
+                                       pr_err("Added bitmap %s to %s\n",
+                                              mddev->bitmap_file, me->path);
                                else if (errno != EEXIST)
-                                       fprintf(stderr, Name
-                                          ": Failed to add bitmap to %s: %s\n",
-                                               me->path, strerror(errno));
+                                       pr_err("Failed to add bitmap to %s: %s\n",
+                                              me->path, strerror(errno));
                        }
                }
-               sra = sysfs_read(mdfd, 0, 0);
+               /* FIXME check for reshape_active and consider not
+                * starting array.
+                */
+               sra = sysfs_read(mdfd, NULL, 0);
                if (sra) {
                        if (sysfs_set_str(sra, NULL,
                                          "array_state", "read-auto") == 0) {
-                               if (verbose >= 0)
-                                       fprintf(stderr, Name
-                                               ": started array %s\n",
-                                               me->path ?: devnum2devname(me->devnum));
+                               if (c->verbose >= 0)
+                                       pr_err("started array %s\n",
+                                              me->path ?: me->devnm);
                        } else {
-                               fprintf(stderr, Name
-                                       ": failed to start array %s: %s\n",
-                                       me->path ?: devnum2devname(me->devnum),
-                                       strerror(errno));
+                               pr_err("failed to start array %s: %s\n",
+                                      me->path ?: me->devnm,
+                                      strerror(errno));
                                rv = 1;
                        }
+                       sysfs_free(sra);
                }
        }
+       map_free(mapl);
        return rv;
 }
 
@@ -1395,18 +1427,18 @@ static char *container2devname(char *devname)
        if (devname[0] == '/') {
                int fd = open(devname, O_RDONLY);
                if (fd >= 0) {
-                       mdname = devnum2devname(fd2devnum(fd));
+                       mdname = xstrdup(fd2devnm(fd));
                        close(fd);
                }
        } else {
                int uuid[4];
                struct map_ent *mp, *map = NULL;
-                                       
+
                if (!parse_uuid(devname, uuid))
                        return mdname;
                mp = map_by_uuid(&map, uuid);
                if (mp)
-                       mdname = devnum2devname(mp->devnum);
+                       mdname = xstrdup(mp->devnm);
                map_free(map);
        }
 
@@ -1414,8 +1446,7 @@ static char *container2devname(char *devname)
 }
 
 static int Incremental_container(struct supertype *st, char *devname,
-                                char *homehost, int verbose,
-                                int runstop, int autof)
+                                struct context *c, char *only)
 {
        /* Collect the contents of this container and for each
         * array, choose a device name and assemble the array.
@@ -1432,26 +1463,31 @@ static int Incremental_container(struct supertype *st, char *devname,
        struct map_ent *smp;
        int suuid[4];
        int sfd;
+       int ra_blocked = 0;
+       int ra_all = 0;
+       int result = 0;
 
        st->ss->getinfo_super(st, &info, NULL);
 
-       if ((runstop > 0 && info.container_enough >= 0) ||
+       if ((c->runstop > 0 && info.container_enough >= 0) ||
            info.container_enough > 0)
                /* pass */;
        else {
-               if (verbose)
-                       fprintf(stderr, Name ": not enough devices to start the container\n");
+               if (c->export) {
+                       printf("MD_STARTED=no\n");
+               } else if (c->verbose)
+                       pr_err("not enough devices to start the container\n");
                return 0;
        }
 
-       match = search_mdstat(st, &info, devname, verbose, &rv);
+       match = conf_match(st, &info, devname, c->verbose, &rv);
        if (match == NULL && rv == 2)
                return rv;
 
        /* Need to compute 'trustworthy' */
        if (match)
                trustworthy = LOCAL;
-       else if (st->ss->match_home(st, homehost) == 1)
+       else if (st->ss->match_home(st, c->homehost) == 1)
                trustworthy = LOCAL;
        else if (st->ss->match_home(st, "any") == 1)
                trustworthy = LOCAL;
@@ -1459,33 +1495,36 @@ static int Incremental_container(struct supertype *st, char *devname,
                trustworthy = FOREIGN;
 
        list = st->ss->container_content(st, NULL);
-       if (map_lock(&map))
-               fprintf(stderr, Name ": failed to get exclusive lock on "
-                       "mapfile\n");
-       /* do not assemble arrays that might have bad blocks */
-       if (list->array.state & (1<<MD_SB_BBM_ERRORS)) {
-               fprintf(stderr, Name ": BBM log found in metadata. "
-                                       "Cannot activate array(s).\n");
-               /* free container data and exit */
-               sysfs_free(list);
-               return 2;
+       /* when nothing to activate - quit */
+       if (list == NULL) {
+               if (c->export) {
+                       printf("MD_STARTED=nothing\n");
+               }
+               return 0;
        }
-
        for (ra = list ; ra ; ra = ra->next) {
                int mdfd;
                char chosen_name[1024];
                struct map_ent *mp;
                struct mddev_ident *match = NULL;
 
+               ra_all++;
+               /* do not activate arrays blocked by metadata handler */
+               if (ra->array.state & (1 << MD_SB_BLOCK_VOLUME)) {
+                       pr_err("Cannot activate array %s in %s.\n",
+                               ra->text_version, devname);
+                       ra_blocked++;
+                       continue;
+               }
                mp = map_by_uuid(&map, ra->uuid);
 
                if (mp) {
-                       mdfd = open_dev(mp->devnum);
+                       mdfd = open_dev(mp->devnm);
                        if (mp->path)
                                strcpy(chosen_name, mp->path);
                        else
-                               strcpy(chosen_name, devnum2devname(mp->devnum));
-               } else {
+                               strcpy(chosen_name, mp->devnm);
+               } else if (!only) {
 
                        /* Check in mdadm.conf for container == devname and
                         * member == ra->text_version after second slash.
@@ -1519,40 +1558,69 @@ static int Incremental_container(struct supertype *st, char *devname,
                                free(dn);
                                /* we have a match */
                                match = array_list;
-                               if (verbose>0)
-                                       fprintf(stderr, Name ": match found for member %s\n",
+                               if (c->verbose>0)
+                                       pr_err("match found for member %s\n",
                                                array_list->member);
                                break;
                        }
 
                        if (match && match->devname &&
                            strcasecmp(match->devname, "<ignore>") == 0) {
-                               if (verbose > 0)
-                                       fprintf(stderr, Name ": array %s/%s is "
-                                               "explicitly ignored by mdadm.conf\n",
-                                               match->container, match->member);
-                               return 2;
+                               if (c->verbose > 0)
+                                       pr_err("array %s/%s is explicitly ignored by mdadm.conf\n",
+                                              match->container, match->member);
+                               continue;
                        }
                        if (match)
                                trustworthy = LOCAL;
 
                        mdfd = create_mddev(match ? match->devname : NULL,
                                            ra->name,
-                                           autof,
+                                           c->autof,
                                            trustworthy,
-                                           chosen_name);
+                                           chosen_name, 0);
                }
+               if (only && (!mp || strcmp(mp->devnm, only) != 0))
+                       continue;
 
                if (mdfd < 0) {
-                       fprintf(stderr, Name ": failed to open %s: %s.\n",
+                       pr_err("failed to open %s: %s.\n",
                                chosen_name, strerror(errno));
                        return 2;
                }
 
-               assemble_container_content(st, mdfd, ra, runstop,
-                                          chosen_name, verbose, NULL);
+               assemble_container_content(st, mdfd, ra, c,
+                                          chosen_name, &result);
+               map_free(map);
+               map = NULL;
                close(mdfd);
        }
+       if (c->export && result) {
+               char sep = '=';
+               printf("MD_STARTED");
+               if (result & INCR_NO) {
+                       printf("%cno", sep);
+                       sep = ',';
+               }
+               if (result & INCR_UNSAFE) {
+                       printf("%cunsafe", sep);
+                       sep = ',';
+               }
+               if (result & INCR_ALREADY) {
+                       printf("%calready", sep);
+                       sep = ',';
+               }
+               if (result & INCR_YES) {
+                       printf("%cyes", sep);
+                       sep = ',';
+               }
+               printf("\n");
+       }
+
+       /* don't move spares to container with volume being activated
+          when all volumes are blocked */
+       if (ra_all == ra_blocked)
+               return 0;
 
        /* Now move all suitable spares from spare container */
        domains = domain_from_array(list, st->ss->name);
@@ -1564,12 +1632,15 @@ static int Incremental_container(struct supertype *st, char *devname,
                struct supertype *sst =
                        super_imsm.match_metadata_desc("imsm");
                struct mdinfo *sinfo;
-               unsigned long long min_size = 0;
-               if (st->ss->min_acceptable_spare_size)
-                       min_size = st->ss->min_acceptable_spare_size(st);
+
                if (!sst->ss->load_container(sst, sfd, NULL)) {
+                       struct spare_criteria sc = {0, 0};
+
+                       if (st->ss->get_spare_criteria)
+                               st->ss->get_spare_criteria(st, &sc);
+
                        close(sfd);
-                       sinfo = container_choose_spares(sst, min_size,
+                       sinfo = container_choose_spares(sst, &sc,
                                                        domains, NULL,
                                                        st->ss->name, 0);
                        sst->ss->free_super(sst);
@@ -1590,19 +1661,69 @@ static int Incremental_container(struct supertype *st, char *devname,
                                        disks = disks->next;
                                }
                                if (count)
-                                       fprintf(stderr, Name
-                                               ": Added %d spare%s to %s\n",
-                                               count, count>1?"s":"", devname);
+                                       pr_err("Added %d spare%s to %s\n",
+                                              count, count>1?"s":"", devname);
                        }
                        sysfs_free(sinfo);
                } else
                        close(sfd);
        }
        domain_free(domains);
-       map_unlock(&map);
+       map_free(map);
        return 0;
 }
 
+static void run_udisks(char *arg1, char *arg2)
+{
+       int pid = fork();
+       int status;
+       if (pid == 0) {
+               execl("/usr/bin/udisks", "udisks", arg1, arg2, NULL);
+               execl("/bin/udisks", "udisks", arg1, arg2, NULL);
+               exit(1);
+       }
+       while (pid > 0 && wait(&status) != pid)
+               ;
+}
+
+static int force_remove(char *devnm, int fd, struct mdinfo *mdi, int verbose)
+{
+       int rv;
+       int devid = devnm2devid(devnm);
+
+       run_udisks("--unmount", map_dev(major(devid), minor(devid), 0));
+       rv = Manage_stop(devnm, fd, verbose, 1);
+       if (rv) {
+               /* At least we can try to trigger a 'remove' */
+               sysfs_uevent(mdi, "remove");
+               if (verbose)
+                       pr_err("Fail to stop %s too.\n", devnm);
+       }
+       return rv;
+}
+
+static void remove_from_member_array(struct mdstat_ent *memb,
+                                   struct mddev_dev *devlist, int verbose)
+{
+       int rv;
+       struct mdinfo mmdi;
+       int subfd = open_dev(memb->devnm);
+
+       if (subfd >= 0) {
+               rv = Manage_subdevs(memb->devnm, subfd, devlist, verbose,
+                                   0, NULL, 0);
+               if (rv & 2) {
+                       if (sysfs_init(&mmdi, -1, memb->devnm))
+                               pr_err("unable to initialize sysfs for: %s\n",
+                                      memb->devnm);
+                       else
+                               force_remove(memb->devnm, subfd, &mmdi,
+                                            verbose);
+               }
+               close(subfd);
+       }
+}
+
 /*
  * IncrementalRemove - Attempt to see if the passed in device belongs to any
  * raid arrays, and if so first fail (if needed) and then remove the device.
@@ -1616,36 +1737,51 @@ static int Incremental_container(struct supertype *st, char *devname,
 int IncrementalRemove(char *devname, char *id_path, int verbose)
 {
        int mdfd;
-       int rv;
+       int rv = 0;
        struct mdstat_ent *ent;
        struct mddev_dev devlist;
+       struct mdinfo mdi;
+       char buf[32];
 
        if (!id_path)
-               dprintf(Name ": incremental removal without --path <id_path> "
-                       "lacks the possibility to re-add new device in this "
-                       "port\n");
+               dprintf("incremental removal without --path <id_path> lacks the possibility to re-add new device in this port\n");
 
        if (strchr(devname, '/')) {
-               fprintf(stderr, Name ": incremental removal requires a "
-                       "kernel device name, not a file: %s\n", devname);
+               pr_err("incremental removal requires a kernel device name, not a file: %s\n", devname);
                return 1;
        }
        ent = mdstat_by_component(devname);
        if (!ent) {
-               fprintf(stderr, Name ": %s does not appear to be a component "
-                       "of any array\n", devname);
+               if (verbose >= 0)
+                       pr_err("%s does not appear to be a component of any array\n", devname);
                return 1;
        }
-       mdfd = open_dev(ent->devnum);
+       if (sysfs_init(&mdi, -1, ent->devnm)) {
+               pr_err("unable to initialize sysfs for: %s\n", devname);
+               return 1;
+       }
+       mdfd = open_dev_excl(ent->devnm);
+       if (mdfd > 0) {
+               close(mdfd);
+               if (sysfs_get_str(&mdi, NULL, "array_state",
+                                 buf, sizeof(buf)) > 0) {
+                       if (strncmp(buf, "active", 6) == 0 ||
+                           strncmp(buf, "clean", 5) == 0)
+                               sysfs_set_str(&mdi, NULL,
+                                             "array_state", "read-auto");
+               }
+       }
+       mdfd = open_dev(ent->devnm);
        if (mdfd < 0) {
-               fprintf(stderr, Name ": Cannot open array %s!!\n", ent->dev);
+               if (verbose >= 0)
+                       pr_err("Cannot open array %s!!\n", ent->devnm);
                free_mdstat(ent);
                return 1;
        }
 
        if (id_path) {
                struct map_ent *map = NULL, *me;
-               me = map_by_devnum(&map, ent->devnum);
+               me = map_by_devnm(&map, ent->devnm);
                if (me)
                        policy_save_path(id_path, me);
                map_free(map);
@@ -1659,21 +1795,28 @@ int IncrementalRemove(char *devname, char *id_path, int verbose)
            strncmp(ent->metadata_version, "external:", 9) == 0) {
                struct mdstat_ent *mdstat = mdstat_read(0, 0);
                struct mdstat_ent *memb;
-               for (memb = mdstat ; memb ; memb = memb->next)
-                       if (is_container_member(memb, ent->dev)) {
-                               int subfd = open_dev(memb->devnum);
-                               if (subfd >= 0) {
-                                       Manage_subdevs(memb->dev, subfd,
-                                                      &devlist, verbose, 0,
-                                                      NULL);
-                                       close(subfd);
-                               }
-                       }
+               for (memb = mdstat ; memb ; memb = memb->next) {
+                       if (is_container_member(memb, ent->devnm))
+                               remove_from_member_array(memb,
+                                       &devlist, verbose);
+               }
                free_mdstat(mdstat);
-       } else
-               Manage_subdevs(ent->dev, mdfd, &devlist, verbose, 0, NULL);
+       } else {
+               rv |= Manage_subdevs(ent->devnm, mdfd, &devlist,
+                                   verbose, 0, NULL, 0);
+               if (rv & 2) {
+               /* Failed due to EBUSY, try to stop the array.
+                * Give udisks a chance to unmount it first.
+                */
+                       rv = force_remove(ent->devnm, mdfd, &mdi, verbose);
+                       goto end;
+               }
+       }
+
        devlist.disposition = 'r';
-       rv = Manage_subdevs(ent->dev, mdfd, &devlist, verbose, 0, NULL);
+       rv = Manage_subdevs(ent->devnm, mdfd, &devlist,
+                           verbose, 0, NULL, 0);
+end:
        close(mdfd);
        free_mdstat(ent);
        return rv;