]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - Incremental.c
Create.c: fix uclibc build
[thirdparty/mdadm.git] / Incremental.c
index c9372587f518dc57693b1d04d746a723423e57b2..83db071214ee57ba507e5cfed73f85494cb9950e 100644 (file)
@@ -86,7 +86,7 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
         * - if number of OK devices match expected, or -R and there are enough,
         *   start the array (auto-readonly).
         */
-       struct stat stb;
+       dev_t rdev, rdev2;
        struct mdinfo info, dinfo;
        struct mdinfo *sra = NULL, *d;
        struct mddev_ident *match;
@@ -99,26 +99,16 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
        int active_disks;
        int trustworthy;
        char *name_to_use;
-       mdu_array_info_t ainf;
        struct dev_policy *policy = NULL;
        struct map_ent target_array;
        int have_target;
        char *devname = devlist->devname;
+       int journal_device_missing = 0;
 
        struct createinfo *ci = conf_get_create_info();
 
-       if (stat(devname, &stb) < 0) {
-               if (c->verbose >= 0)
-                       pr_err("stat failed for %s: %s.\n",
-                               devname, strerror(errno));
-               return rv;
-       }
-       if ((stb.st_mode & S_IFMT) != S_IFBLK) {
-               if (c->verbose >= 0)
-                       pr_err("%s is not a block device.\n",
-                               devname);
+       if (!stat_is_blkdev(devname, &rdev))
                return rv;
-       }
        dfd = dev_open(devname, O_RDONLY);
        if (dfd < 0) {
                if (c->verbose >= 0)
@@ -130,16 +120,13 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
        if (must_be_container(dfd)) {
                if (!st)
                        st = super_by_fd(dfd, NULL);
-               if (st)
-                       st->ignore_hw_compat = 1;
                if (st && st->ss->load_container)
                        rv = st->ss->load_container(st, dfd, NULL);
 
                close(dfd);
                if (!rv && st->ss->container_content) {
                        if (map_lock(&map))
-                               pr_err("failed to get "
-                                      "exclusive lock on mapfile\n");
+                               pr_err("failed to get exclusive lock on mapfile\n");
                        if (c->export)
                                printf("MD_DEVNAME=%s\n", devname);
                        rv = Incremental_container(st, devname, c, NULL);
@@ -160,10 +147,8 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
        if (!devlist) {
                devlist = conf_get_devs();
                for (;devlist; devlist = devlist->next) {
-                       struct stat st2;
-                       if (stat(devlist->devname, &st2) == 0 &&
-                           (st2.st_mode & S_IFMT) == S_IFBLK &&
-                           st2.st_rdev == stb.st_rdev)
+                       if (stat_is_blkdev(devlist->devname, &rdev2) &&
+                           rdev2 == rdev)
                                break;
                }
        }
@@ -177,37 +162,28 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
        /* 2/ Find metadata, reject if none appropriate (check
         *            version/name from args) */
 
-       if (fstat(dfd, &stb) < 0) {
-               if (c->verbose >= 0)
-                       pr_err("fstat failed for %s: %s.\n",
-                               devname, strerror(errno));
+       if (!fstat_is_blkdev(dfd, devname, &rdev))
                goto out;
-       }
-       if ((stb.st_mode & S_IFMT) != S_IFBLK) {
-               if (c->verbose >= 0)
-                       pr_err("%s is not a block device.\n",
-                               devname);
-               goto out;
-       }
 
-       dinfo.disk.major = major(stb.st_rdev);
-       dinfo.disk.minor = minor(stb.st_rdev);
+       dinfo.disk.major = major(rdev);
+       dinfo.disk.minor = minor(rdev);
 
        policy = disk_policy(&dinfo);
        have_target = policy_check_path(&dinfo, &target_array);
 
-       if (st == NULL && (st = guess_super(dfd)) == NULL) {
+       if (st == NULL && (st = guess_super_type(dfd, guess_array)) == NULL) {
                if (c->verbose >= 0)
                        pr_err("no recognisable superblock on %s.\n",
                               devname);
                rv = try_spare(devname, &dfd, policy,
                               have_target ? &target_array : NULL,
-                              st, c->verbose);
+                              NULL, c->verbose);
                goto out;
        }
-       st->ignore_hw_compat = 1;
+       st->ignore_hw_compat = 0;
+
        if (st->ss->compare_super == NULL ||
-           st->ss->load_super(st, dfd, NULL)) {
+           st->ss->load_super(st, dfd, c->verbose >= 0 ? devname : NULL)) {
                if (c->verbose >= 0)
                        pr_err("no RAID superblock on %s.\n",
                                devname);
@@ -226,11 +202,9 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
        if (!match && rv == 2)
                goto out;
 
-       if (match && match->devname
-           && strcasecmp(match->devname, "<ignore>") == 0) {
+       if (match && match->devname && is_devname_ignore(match->devname) == true) {
                if (c->verbose >= 0)
-                       pr_err("array containing %s is explicitly"
-                               " ignored by mdadm.conf\n",
+                       pr_err("array containing %s is explicitly ignored by mdadm.conf\n",
                                devname);
                goto out;
        }
@@ -251,8 +225,7 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
        if (!match && !conf_test_metadata(st->ss->name, policy,
                                          (trustworthy == LOCAL))) {
                if (c->verbose >= 1)
-                       pr_err("%s has metadata type %s for which "
-                              "auto-assembly is disabled\n",
+                       pr_err("%s has metadata type %s for which auto-assembly is disabled\n",
                               devname, st->ss->name);
                goto out;
        }
@@ -270,8 +243,7 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
                c->autof = ci->autof;
 
        name_to_use = info.name;
-       if (name_to_use[0] == 0 &&
-           info.array.level == LEVEL_CONTAINER) {
+       if (name_to_use[0] == 0 && is_container(info.array.level)) {
                name_to_use = info.text_version;
                trustworthy = METADATA;
        }
@@ -289,8 +261,7 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
        /* 4/ Check if array exists.
         */
        if (map_lock(&map))
-               pr_err("failed to get exclusive lock on "
-                       "mapfile\n");
+               pr_err("failed to get exclusive lock on mapfile\n");
        /* Now check we can get O_EXCL.  If not, probably "mdadm -A" has
         * taken over
         */
@@ -317,14 +288,25 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
 
        if (mdfd < 0) {
 
+               /* Skip the clustered ones. This should be started by
+                * clustering resource agents
+                */
+               if (info.array.state & (1 << MD_SB_CLUSTERED))
+                       goto out;
+
                /* Couldn't find an existing array, maybe make a new one */
                mdfd = create_mddev(match ? match->devname : NULL,
-                                   name_to_use, c->autof, trustworthy, chosen_name);
+                                   name_to_use, c->autof, trustworthy, chosen_name, 0);
 
                if (mdfd < 0)
                        goto out_unlock;
 
-               sysfs_init(&info, mdfd, NULL);
+               if (sysfs_init(&info, mdfd, NULL)) {
+                       pr_err("unable to initialize sysfs for %s\n",
+                              chosen_name);
+                       rv = 2;
+                       goto out_unlock;
+               }
 
                if (set_array_info(mdfd, st, &info) != 0) {
                        pr_err("failed to set array info for %s: %s\n",
@@ -334,8 +316,8 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
                }
 
                dinfo = info;
-               dinfo.disk.major = major(stb.st_rdev);
-               dinfo.disk.minor = minor(stb.st_rdev);
+               dinfo.disk.major = major(rdev);
+               dinfo.disk.minor = minor(rdev);
                if (add_disk(mdfd, st, &info, &dinfo) != 0) {
                        pr_err("failed to add %s to new array %s: %s.\n",
                                devname, chosen_name, strerror(errno));
@@ -352,8 +334,7 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
                         * So reject it.
                         */
                        ioctl(mdfd, STOP_ARRAY, NULL);
-                       pr_err("You have an old buggy kernel which cannot support\n"
-                              "      --incremental reliably.  Aborting.\n");
+                       pr_err("You have an old buggy kernel which cannot support\n      --incremental reliably.  Aborting.\n");
                        rv = 2;
                        goto out_unlock;
                }
@@ -392,12 +373,11 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
                 * flag has a different meaning.  The test has to happen
                 * at the device level there
                 */
-               if (!st->ss->external
-                   && (info.disk.state & (1<<MD_DISK_SYNC)) != 0
-                   && ! policy_action_allows(policy, st->ss->name,
-                                             act_re_add)
-                   && c->runstop < 1) {
-                       if (ioctl(mdfd, GET_ARRAY_INFO, &ainf) == 0) {
+               if (!st->ss->external &&
+                   (info.disk.state & (1 << MD_DISK_SYNC)) != 0 &&
+                   !policy_action_allows(policy, st->ss->name, act_re_add) &&
+                   c->runstop < 1) {
+                       if (md_array_active(mdfd)) {
                                pr_err("not adding %s to active array (without --run) %s\n",
                                       devname, chosen_name);
                                rv = 2;
@@ -419,9 +399,8 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
                        }
                        st2 = dup_super(st);
                        if (st2->ss->load_super(st2, dfd2, NULL) ||
-                           st->ss->compare_super(st, st2) != 0) {
-                               pr_err("metadata mismatch between %s and "
-                                      "chosen array %s\n",
+                           st->ss->compare_super(st, st2, 1) != 0) {
+                               pr_err("metadata mismatch between %s and chosen array %s\n",
                                       devname, chosen_name);
                                close(dfd2);
                                rv = 2;
@@ -439,11 +418,15 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
                                goto out_unlock;
                        }
                }
-               info.disk.major = major(stb.st_rdev);
-               info.disk.minor = minor(stb.st_rdev);
+               info.disk.major = major(rdev);
+               info.disk.minor = minor(rdev);
                /* add disk needs to know about containers */
                if (st->ss->external)
                        sra->array.level = LEVEL_CONTAINER;
+
+               if (info.array.state & (1 << MD_SB_CLUSTERED))
+                       info.disk.state |= (1 << MD_DISK_CLUSTER_ADD);
+
                err = add_disk(mdfd, st, sra, &info);
                if (err < 0 && errno == EBUSY) {
                        /* could be another device present with the same
@@ -476,8 +459,8 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
                        info.array.working_disks ++;
 
        }
-       if (strncmp(chosen_name, "/dev/md/", 8) == 0)
-               md_devname = chosen_name+8;
+       if (strncmp(chosen_name, DEV_MD_DIR, DEV_MD_DIR_LEN) == 0)
+               md_devname = chosen_name + DEV_MD_DIR_LEN;
        else
                md_devname = chosen_name;
        if (c->export) {
@@ -488,7 +471,7 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
 
        /* 7/ Is there enough devices to possibly start the array? */
        /* 7a/ if not, finish with success. */
-       if (info.array.level == LEVEL_CONTAINER) {
+       if (is_container(info.array.level)) {
                char devnm[32];
                /* Try to assemble within the container */
                sysfs_uevent(sra, "change");
@@ -496,6 +479,7 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
                        pr_err("container %s now has %d device%s\n",
                               chosen_name, info.array.working_disks,
                               info.array.working_disks == 1?"":"s");
+               sysfs_rules_apply(chosen_name, &info);
                wait_for(chosen_name, mdfd);
                if (st->ss->external)
                        strcpy(devnm, fd2devnm(mdfd));
@@ -521,9 +505,17 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
        sra = sysfs_read(mdfd, NULL, (GET_DEVS | GET_STATE |
                                    GET_OFFSET | GET_SIZE));
        active_disks = count_active(st, sra, mdfd, &avail, &info);
+
+       if (!avail)
+               goto out_unlock;
+
+       journal_device_missing = (info.journal_device_required) && (info.journal_clean == 0);
+
+       if (info.consistency_policy == CONSISTENCY_POLICY_PPL)
+               info.array.state |= 1;
+
        if (enough(info.array.level, info.array.raid_disks,
-                  info.array.layout, info.array.state & 1,
-                  avail) == 0) {
+                  info.array.layout, info.array.state & 1, avail) == 0) {
                if (c->export) {
                        printf("MD_STARTED=no\n");
                } else if (c->verbose >= 0)
@@ -539,7 +531,7 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
        /*   + add any bitmap file  */
        /*   + start the array (auto-readonly). */
 
-       if (ioctl(mdfd, GET_ARRAY_INFO, &ainf) == 0) {
+       if (md_array_active(mdfd)) {
                if (c->export) {
                        printf("MD_STARTED=already\n");
                } else if (c->verbose >= 0)
@@ -550,10 +542,12 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
        }
 
        map_unlock(&map);
-       if (c->runstop > 0 || active_disks >= info.array.working_disks) {
+       if (c->runstop > 0 || (!journal_device_missing && active_disks >= info.array.working_disks)) {
                struct mdinfo *dsk;
                /* Let's try to start it */
 
+               if (journal_device_missing)
+                       pr_err("Trying to run with missing journal device\n");
                if (info.reshape_active && !(info.reshape_active & RESHAPE_NO_BACKUP)) {
                        pr_err("%s: This array is being reshaped and cannot be started\n",
                               chosen_name);
@@ -582,8 +576,8 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
                        if (d->disk.state & (1<<MD_DISK_REMOVED))
                                remove_disk(mdfd, st, sra, d);
 
-               if ((sra == NULL || active_disks >= info.array.working_disks)
-                   && trustworthy != FOREIGN)
+               if ((sra == NULL || active_disks >= info.array.working_disks) &&
+                   trustworthy != FOREIGN)
                        rv = ioctl(mdfd, RUN_ARRAY, NULL);
                else
                        rv = sysfs_set_str(sra, NULL,
@@ -607,7 +601,8 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
                         * those devices we should re-add them now.
                         */
                        for (dsk = sra->devs; dsk ; dsk = dsk->next) {
-                               if (disk_action_allows(dsk, st->ss->name, act_re_add) &&
+                               if (disk_action_allows(dsk, st->ss->name,
+                                                      act_re_add) &&
                                    add_disk(mdfd, st, sra, dsk) == 0)
                                        pr_err("%s re-added to %s\n",
                                               dsk->sys_name, chosen_name);
@@ -620,6 +615,8 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
        } else {
                if (c->export) {
                        printf("MD_STARTED=unsafe\n");
+               } else if (journal_device_missing) {
+                       pr_err("Journal device is missing, not safe to start yet.\n");
                } else if (c->verbose >= 0)
                        pr_err("%s attached to %s, not enough to start safely.\n",
                               devname, chosen_name);
@@ -633,8 +630,7 @@ out:
                close(mdfd);
        if (policy)
                dev_policy_free(policy);
-       if (sra)
-               sysfs_free(sra);
+       sysfs_free(sra);
        return rv;
 out_unlock:
        map_unlock(&map);
@@ -649,14 +645,13 @@ static void find_reject(int mdfd, struct supertype *st, struct mdinfo *sra,
         * and events less than the passed events, and remove the device.
         */
        struct mdinfo *d;
-       mdu_array_info_t ra;
 
-       if (ioctl(mdfd, GET_ARRAY_INFO, &ra) == 0)
+       if (md_array_active(mdfd))
                return; /* not safe to remove from active arrays
                         * without thinking more */
 
        for (d = sra->devs; d ; d = d->next) {
-               char dn[10];
+               char dn[24]; // 2*11 bytes for ints (including sign) + colon + null byte
                int dfd;
                struct mdinfo info;
                sprintf(dn, "%d:%d", d->disk.major, d->disk.minor);
@@ -671,12 +666,11 @@ static void find_reject(int mdfd, struct supertype *st, struct mdinfo *sra,
                st->ss->free_super(st);
                close(dfd);
 
-               if (info.disk.number != number ||
-                   info.events >= events)
+               if (info.disk.number != number || info.events >= events)
                        continue;
 
                if (d->disk.raid_disk > -1)
-                       sysfs_set_str(sra, d, "slot", "none");
+                       sysfs_set_str(sra, d, "slot", STR_COMMON_NONE);
                if (sysfs_set_str(sra, d, "state", "remove") == 0)
                        if (verbose >= 0)
                                pr_err("removing old device %s from %s\n",
@@ -693,6 +687,7 @@ static int count_active(struct supertype *st, struct mdinfo *sra,
        int cnt = 0;
        int replcnt = 0;
        __u64 max_events = 0;
+       __u64 max_journal_events = 0;
        char *avail = NULL;
        int *best = NULL;
        char *devmap = NULL;
@@ -720,8 +715,12 @@ static int count_active(struct supertype *st, struct mdinfo *sra,
                close(dfd);
                if (ok != 0)
                        continue;
+
                info.array.raid_disks = raid_disks;
                st->ss->getinfo_super(st, &info, devmap + raid_disks * devnum);
+               if (info.disk.raid_disk == MD_DISK_ROLE_JOURNAL &&
+                   info.events > max_journal_events)
+                       max_journal_events = info.events;
                if (!avail) {
                        raid_disks = info.array.raid_disks;
                        avail = xcalloc(raid_disks, 1);
@@ -771,6 +770,9 @@ static int count_active(struct supertype *st, struct mdinfo *sra,
                        replcnt++;
                st->ss->free_super(st);
        }
+       if (max_journal_events >= max_events - 1)
+               bestinfo->journal_clean = 1;
+
        if (!avail)
                return 0;
        /* We need to reject any device that thinks the best device is
@@ -807,28 +809,76 @@ static int count_active(struct supertype *st, struct mdinfo *sra,
 }
 
 /* test if container has degraded member(s) */
-static int container_members_max_degradation(struct map_ent *map, struct map_ent *me)
+static int
+container_members_max_degradation(struct map_ent *map, struct map_ent *me)
 {
-       mdu_array_info_t array;
-       int afd;
-       int max_degraded = 0;
+       struct mdinfo *sra;
+       int degraded, max_degraded = 0;
 
        for(; map; map = map->next) {
                if (!metadata_container_matches(map->metadata, me->devnm))
                        continue;
-               afd = open_dev(map->devnm);
-               if (afd < 0)
-                       continue;
                /* most accurate information regarding array degradation */
-               if (ioctl(afd, GET_ARRAY_INFO, &array) >= 0) {
-                       int degraded = array.raid_disks - array.active_disks -
-                                      array.spare_disks;
-                       if (degraded > max_degraded)
-                               max_degraded = degraded;
-               }
-               close(afd);
+               sra = sysfs_read(-1, map->devnm,
+                                GET_DISKS | GET_DEVS | GET_STATE);
+               if (!sra)
+                       continue;
+               degraded = sra->array.raid_disks - sra->array.active_disks -
+                       sra->array.spare_disks;
+               if (degraded > max_degraded)
+                       max_degraded = degraded;
+               sysfs_free(sra);
        }
-       return (max_degraded);
+
+       return max_degraded;
+}
+
+/**
+ * incremental_external_test_spare_criteria() - helper to test spare criteria.
+ * @st: supertype, must be not NULL, it is duplicated here.
+ * @container_devnm: devnm of the container.
+ * @disk_fd: file descriptor of device to tested.
+ * @verbose: verbose flag.
+ *
+ * The function is used on new drive verification path to check if it can be added to external
+ * container. To test spare criteria, metadata must be loaded. It duplicates super to not mess in
+ * original one.
+ * Function is executed if superblock supports get_spare_criteria(), otherwise success is returned.
+ */
+mdadm_status_t incremental_external_test_spare_criteria(struct supertype *st, char *container_devnm,
+                                                       int disk_fd, int verbose)
+{
+       mdadm_status_t rv = MDADM_STATUS_ERROR;
+       char container_devname[PATH_MAX];
+       struct spare_criteria sc = {0};
+       struct supertype *dup;
+
+       if (!st->ss->get_spare_criteria)
+               return MDADM_STATUS_SUCCESS;
+
+       dup = dup_super(st);
+       snprintf(container_devname, PATH_MAX, "/dev/%s", container_devnm);
+
+       if (dup->ss->get_spare_criteria(dup, container_devname, &sc) != 0) {
+               if (verbose > 1)
+                       pr_err("Failed to get spare criteria for %s\n", container_devname);
+               goto out;
+       }
+
+       if (!disk_fd_matches_criteria(dup, disk_fd, &sc)) {
+               if (verbose > 1)
+                       pr_err("Disk does not match spare criteria for %s\n", container_devname);
+               goto out;
+       }
+
+       rv = MDADM_STATUS_SUCCESS;
+
+out:
+       dev_policy_free(sc.pols);
+       dup->ss->free_super(dup);
+       free(dup);
+
+       return rv;
 }
 
 static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
@@ -846,12 +896,12 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
         * Return 0 on success, or some exit code on failure, probably 1.
         */
        int rv = 1;
-       struct stat stb;
+       dev_t rdev;
        struct map_ent *mp, *map = NULL;
        struct mdinfo *chosen = NULL;
        int dfd = *dfdp;
 
-       if (fstat(dfd, &stb) != 0)
+       if (!fstat_is_blkdev(dfd, devname, &rdev))
                return 1;
 
        /*
@@ -864,16 +914,14 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
         */
 
        if (map_lock(&map)) {
-               pr_err("failed to get exclusive lock on "
-                       "mapfile\n");
+               pr_err("failed to get exclusive lock on mapfile\n");
                return 1;
        }
        for (mp = map ; mp ; mp = mp->next) {
                struct supertype *st2;
                struct domainlist *dl = NULL;
                struct mdinfo *sra;
-               unsigned long long devsize;
-               unsigned long long component_size = 0;
+               unsigned long long freesize = 0;
 
                if (is_subarray(mp->metadata))
                        continue;
@@ -892,16 +940,10 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
                }
                sra = sysfs_read(-1, mp->devnm,
                                 GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE|
-                                GET_DEGRADED|GET_COMPONENT|GET_VERSION);
-               if (!sra) {
-                       /* Probably a container - no degraded info */
-                       sra = sysfs_read(-1, mp->devnm,
-                                        GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE|
-                                        GET_COMPONENT|GET_VERSION);
-                       if (sra)
-                               sra->array.failed_disks = -1;
-               }
-               if (!sra)
+                                GET_COMPONENT|GET_VERSION);
+               if (sra)
+                       sra->array.failed_disks = -1;
+               else
                        continue;
                if (st == NULL) {
                        int i;
@@ -911,8 +953,7 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
                                        sra->text_version);
                        if (!st2) {
                                if (verbose > 1)
-                                       pr_err("not adding %s to %s"
-                                               " as metadata not recognised.\n",
+                                       pr_err("not adding %s to %s as metadata not recognised.\n",
                                                devname, mp->path);
                                goto next;
                        }
@@ -931,32 +972,19 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
                if (sra->array.failed_disks == -1)
                        sra->array.failed_disks = container_members_max_degradation(map, mp);
 
-               get_dev_size(dfd, NULL, &devsize);
                if (sra->component_size == 0) {
-                       /* true for containers, here we must read superblock
-                        * to obtain minimum spare size */
-                       struct supertype *st3 = dup_super(st2);
-                       int mdfd = open_dev(mp->devnm);
-                       if (mdfd < 0) {
-                               free(st3);
+                       /* true for containers */
+                       if (incremental_external_test_spare_criteria(st2, mp->devnm, dfd, verbose))
                                goto next;
-                       }
-                       if (st3->ss->load_container &&
-                           !st3->ss->load_container(st3, mdfd, mp->path)) {
-                               component_size = st3->ss->min_acceptable_spare_size(st3);
-                               st3->ss->free_super(st3);
-                       }
-                       free(st3);
-                       close(mdfd);
                }
-               if ((sra->component_size > 0 &&
-                    st2->ss->avail_size(st2, devsize,
-                                        sra->devs
-                                        ? sra->devs->data_offset
-                                        : INVALID_SECTORS)
-                    < sra->component_size)
-                   ||
-                   (sra->component_size == 0 && devsize < component_size)) {
+
+               if (sra->component_size > 0 &&
+                   st2->ss->validate_geometry(st2, sra->array.level, sra->array.layout,
+                                               sra->array.raid_disks, &sra->array.chunk_size,
+                                               sra->component_size,
+                                               sra->devs ? sra->devs->data_offset : INVALID_SECTORS,
+                                               devname, &freesize, sra->consistency_policy,
+                                               0) && freesize < sra->component_size) {
                        if (verbose > 1)
                                pr_err("not adding %s to %s as it is too small\n",
                                        devname, mp->path);
@@ -976,8 +1004,7 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
                                   sizeof(target->uuid)) == 0 &&
                            sra->array.failed_disks > 0) {
                                /* This is our target!! */
-                               if (chosen)
-                                       sysfs_free(chosen);
+                               sysfs_free(chosen);
                                chosen = sra;
                                sra = NULL;
                                /* skip to end so we don't check any more */
@@ -994,8 +1021,7 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
                if (domain_test(dl, pol, st2->ss->name) != 1) {
                        /* domain test fails */
                        if (verbose > 1)
-                               pr_err("not adding %s to %s as"
-                                       " it is not in a compatible domain\n",
+                               pr_err("not adding %s to %s as it is not in a compatible domain\n",
                                        devname, mp->path);
 
                        goto next;
@@ -1010,8 +1036,7 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
                        sra = NULL;
                }
        next:
-               if (sra)
-                       sysfs_free(sra);
+               sysfs_free(sra);
                if (st != st2)
                        free(st2);
                if (dl)
@@ -1022,18 +1047,19 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
                int mdfd = open_dev(chosen->sys_name);
                if (mdfd >= 0) {
                        struct mddev_dev devlist;
-                       char devname[20];
+                       char chosen_devname[24]; // 2*11 for int (including signs) + colon + null
                        devlist.next = NULL;
                        devlist.used = 0;
-                       devlist.writemostly = 0;
-                       devlist.devname = devname;
-                       sprintf(devname, "%d:%d", major(stb.st_rdev),
-                               minor(stb.st_rdev));
+                       devlist.writemostly = FlagDefault;
+                       devlist.failfast = FlagDefault;
+                       devlist.devname = chosen_devname;
+                       sprintf(chosen_devname, "%d:%d", major(rdev),
+                               minor(rdev));
                        devlist.disposition = 'a';
                        close(dfd);
                        *dfdp = -1;
                        rv =  Manage_subdevs(chosen->sys_name, mdfd, &devlist,
-                                            -1, 0, NULL, 0);
+                                            -1, 0, UOPT_UNDEFINED, 0);
                        close(mdfd);
                }
                if (verbose > 0) {
@@ -1089,9 +1115,9 @@ static int partition_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
                struct supertype *st2 = NULL;
                char *devname = NULL;
                unsigned long long devsectors;
+               char *pathlist[2];
 
-               if (de->d_ino == 0 ||
-                   de->d_name[0] == '.' ||
+               if (de->d_ino == 0 || de->d_name[0] == '.' ||
                    (de->d_type != DT_LNK && de->d_type != DT_UNKNOWN))
                        goto next;
 
@@ -1104,7 +1130,9 @@ static int partition_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
                        /* This is a partition - skip it */
                        goto next;
 
-               pol2 = path_policy(de->d_name, type_disk);
+               pathlist[0] = de->d_name;
+               pathlist[1] = NULL;
+               pol2 = path_policy(pathlist, type_disk);
 
                domain_merge(&domlist, pol2, st ? st->ss->name : NULL);
                if (domain_test(domlist, pol, st ? st->ss->name : NULL) != 1)
@@ -1129,9 +1157,9 @@ static int partition_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
                        st2 = dup_super(st);
                else
                        st2 = guess_super_type(fd, guess_partitions);
-               if (st2 == NULL ||
-                   st2->ss->load_super(st2, fd, NULL) < 0)
+               if (st2 == NULL || st2->ss->load_super(st2, fd, NULL) < 0)
                        goto next;
+               st2->ignore_hw_compat = 0;
 
                if (!st) {
                        /* Check domain policy again, this time referring to metadata */
@@ -1157,8 +1185,7 @@ static int partition_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
                 * metadata which makes better use of the device can
                 * be found.
                 */
-               if (chosen == NULL ||
-                   chosen_size < info.component_size) {
+               if (chosen == NULL || chosen_size < info.component_size) {
                        chosen_size = info.component_size;
                        free(chosen);
                        chosen = devname;
@@ -1267,8 +1294,7 @@ static int try_spare(char *devname, int *dfdp, struct dev_policy *pol,
                    !policy_action_allows(pol, st?st->ss->name:NULL,
                                          act_spare_same_slot)) {
                        if (verbose > 1)
-                               pr_err("%s is not bare, so not "
-                                       "considering as a spare\n",
+                               pr_err("%s is not bare, so not considering as a spare\n",
                                        devname);
                        return 1;
                }
@@ -1280,7 +1306,7 @@ static int try_spare(char *devname, int *dfdp, struct dev_policy *pol,
         * what arrays might be candidates.
         */
        if (st) {
-               /* just try try 'array' or 'partition' based on this metadata */
+               /* just try to add 'array' or 'partition' based on this metadata */
                if (st->ss->add_to_super)
                        return array_try_spare(devname, dfdp, pol, target, bare,
                                               st, verbose);
@@ -1331,15 +1357,17 @@ int IncrementalScan(struct context *c, char *devnm)
 
 restart:
        for (me = mapl ; me ; me = me->next) {
-               mdu_array_info_t array;
-               mdu_bitmap_file_t bmf;
                struct mdinfo *sra;
                int mdfd;
 
                if (devnm && strcmp(devnm, me->devnm) != 0)
                        continue;
-               if (devnm && me->metadata[0] == '/') {
+               if (me->metadata[0] == '/') {
                        char *sl;
+
+                       if (!devnm)
+                               continue;
+
                        /* member array, need to work on container */
                        strncpy(container, me->metadata+1, 32);
                        container[31] = 0;
@@ -1352,19 +1380,18 @@ restart:
                }
                mdfd = open_dev(me->devnm);
 
-               if (mdfd < 0)
+               if (!is_fd_valid(mdfd))
                        continue;
                if (!isdigit(me->metadata[0])) {
                        /* must be a container */
                        struct supertype *st = super_by_fd(mdfd, NULL);
                        int ret = 0;
                        struct map_ent *map = NULL;
-                       if (st)
-                               st->ignore_hw_compat = 1;
+
                        if (st && st->ss->load_container)
                                ret = st->ss->load_container(st, mdfd, NULL);
-                       close(mdfd);
-                       if (!ret && st->ss->container_content) {
+                       close_fd(&mdfd);
+                       if (!ret && st && st->ss->container_content) {
                                if (map_lock(&map))
                                        pr_err("failed to get exclusive lock on mapfile\n");
                                ret = Incremental_container(st, me->path, c, only);
@@ -1374,15 +1401,14 @@ restart:
                                rv = 1;
                        continue;
                }
-               if (ioctl(mdfd, GET_ARRAY_INFO, &array) == 0 ||
-                   errno != ENODEV) {
-                       close(mdfd);
+               if (md_array_active(mdfd)) {
+                       close_fd(&mdfd);
                        continue;
                }
                /* Ok, we can try this one.   Maybe it needs a bitmap */
                for (mddev = devs ; mddev ; mddev = mddev->next)
-                       if (mddev->devname && me->path
-                           && devname_matches(mddev->devname, me->path))
+                       if (mddev->devname && me->path &&
+                           devname_matches(mddev->devname, me->path))
                                break;
                if (mddev && mddev->bitmap_file) {
                        /*
@@ -1390,13 +1416,12 @@ restart:
                         * is a hint only
                         */
                        int added = -1;
-                       if (ioctl(mdfd, GET_ARRAY_INFO, &bmf) < 0) {
-                               int bmfd = open(mddev->bitmap_file, O_RDWR);
-                               if (bmfd >= 0) {
-                                       added = ioctl(mdfd, SET_BITMAP_FILE,
-                                                     bmfd);
-                                       close(bmfd);
-                               }
+                       int bmfd;
+
+                       bmfd = open(mddev->bitmap_file, O_RDWR);
+                       if (is_fd_valid(bmfd)) {
+                               added = ioctl(mdfd, SET_BITMAP_FILE, bmfd);
+                               close_fd(&bmfd);
                        }
                        if (c->verbose >= 0) {
                                if (added == 0)
@@ -1425,7 +1450,9 @@ restart:
                        }
                        sysfs_free(sra);
                }
+               close_fd(&mdfd);
        }
+       map_free(mapl);
        return rv;
 }
 
@@ -1468,24 +1495,16 @@ static int Incremental_container(struct supertype *st, char *devname,
        int trustworthy;
        struct mddev_ident *match;
        int rv = 0;
-       struct domainlist *domains;
-       struct map_ent *smp;
-       int suuid[4];
-       int sfd;
-       int ra_blocked = 0;
-       int ra_all = 0;
        int result = 0;
 
        st->ss->getinfo_super(st, &info, NULL);
 
-       if ((c->runstop > 0 && info.container_enough >= 0) ||
-           info.container_enough > 0)
-               /* pass */;
-       else {
-               if (c->export) {
+       if (info.container_enough < 0 || (info.container_enough == 0 && c->runstop < 1)) {
+               if (c->export)
                        printf("MD_STARTED=no\n");
-               } else if (c->verbose)
-                       pr_err("not enough devices to start the container\n");
+               else if (c->verbose)
+                       pr_err("Not enough devices to start the container.\n");
+
                return 0;
        }
 
@@ -1512,23 +1531,27 @@ static int Incremental_container(struct supertype *st, char *devname,
                return 0;
        }
        for (ra = list ; ra ; ra = ra->next) {
-               int mdfd;
+               int mdfd = -1;
                char chosen_name[1024];
                struct map_ent *mp;
                struct mddev_ident *match = NULL;
 
-               ra_all++;
                /* do not activate arrays blocked by metadata handler */
                if (ra->array.state & (1 << MD_SB_BLOCK_VOLUME)) {
                        pr_err("Cannot activate array %s in %s.\n",
                                ra->text_version, devname);
-                       ra_blocked++;
                        continue;
                }
                mp = map_by_uuid(&map, ra->uuid);
 
                if (mp) {
                        mdfd = open_dev(mp->devnm);
+                       if (!is_fd_valid(mdfd)) {
+                               pr_err("failed to open %s: %s.\n",
+                                      mp->devnm, strerror(errno));
+                               rv = 2;
+                               goto release;
+                       }
                        if (mp->path)
                                strcpy(chosen_name, mp->path);
                        else
@@ -1573,11 +1596,9 @@ static int Incremental_container(struct supertype *st, char *devname,
                                break;
                        }
 
-                       if (match && match->devname &&
-                           strcasecmp(match->devname, "<ignore>") == 0) {
+                       if (match && match->devname && is_devname_ignore(match->devname) == true) {
                                if (c->verbose > 0)
-                                       pr_err("array %s/%s is "
-                                              "explicitly ignored by mdadm.conf\n",
+                                       pr_err("array %s/%s is explicitly ignored by mdadm.conf\n",
                                               match->container, match->member);
                                continue;
                        }
@@ -1588,20 +1609,26 @@ static int Incremental_container(struct supertype *st, char *devname,
                                            ra->name,
                                            c->autof,
                                            trustworthy,
-                                           chosen_name);
+                                           chosen_name, 0);
+
+                       if (!is_fd_valid(mdfd)) {
+                               pr_err("create_mddev failed with chosen name %s: %s.\n",
+                                      chosen_name, strerror(errno));
+                               rv = 2;
+                               goto release;
+                       }
                }
-               if (only && (!mp || strcmp(mp->devnm, only) != 0))
-                       continue;
 
-               if (mdfd < 0) {
-                       pr_err("failed to open %s: %s.\n",
-                               chosen_name, strerror(errno));
-                       return 2;
+               if (only && (!mp || strcmp(mp->devnm, only) != 0)) {
+                       close_fd(&mdfd);
+                       continue;
                }
 
                assemble_container_content(st, mdfd, ra, c,
                                           chosen_name, &result);
-               close(mdfd);
+               map_free(map);
+               map = NULL;
+               close_fd(&mdfd);
        }
        if (c->export && result) {
                char sep = '=';
@@ -1625,69 +1652,26 @@ static int Incremental_container(struct supertype *st, char *devname,
                printf("\n");
        }
 
-       /* don't move spares to container with volume being activated
-          when all volumes are blocked */
-       if (ra_all == ra_blocked)
-               return 0;
-
-       /* Now move all suitable spares from spare container */
-       domains = domain_from_array(list, st->ss->name);
-       memcpy(suuid, uuid_zero, sizeof(int[4]));
-       if (domains &&
-           (smp = map_by_uuid(&map, suuid)) != NULL &&
-           (sfd = open(smp->path, O_RDONLY)) >= 0) {
-               /* spare container found */
-               struct supertype *sst =
-                       super_imsm.match_metadata_desc("imsm");
-               struct mdinfo *sinfo;
-               unsigned long long min_size = 0;
-               if (st->ss->min_acceptable_spare_size)
-                       min_size = st->ss->min_acceptable_spare_size(st);
-               if (!sst->ss->load_container(sst, sfd, NULL)) {
-                       close(sfd);
-                       sinfo = container_choose_spares(sst, min_size,
-                                                       domains, NULL,
-                                                       st->ss->name, 0);
-                       sst->ss->free_super(sst);
-                       if (sinfo){
-                               int count = 0;
-                               struct mdinfo *disks = sinfo->devs;
-                               while (disks) {
-                                       /* move spare from spare
-                                        * container to currently
-                                        * assembled one
-                                        */
-                                       if (move_spare(
-                                                   smp->path,
-                                                   devname,
-                                                   makedev(disks->disk.major,
-                                                           disks->disk.minor)))
-                                               count++;
-                                       disks = disks->next;
-                               }
-                               if (count)
-                                       pr_err("Added %d spare%s to %s\n",
-                                              count, count>1?"s":"", devname);
-                       }
-                       sysfs_free(sinfo);
-               } else
-                       close(sfd);
-       }
-       domain_free(domains);
-       return 0;
+release:
+       map_free(map);
+       sysfs_free(list);
+       return rv;
 }
 
-static void run_udisks(char *arg1, char *arg2)
+static void remove_from_member_array(struct mdstat_ent *memb,
+                                   struct mddev_dev *devlist, int verbose)
 {
-       int pid = fork();
-       int status;
-       if (pid == 0) {
-               execl("/usr/bin/udisks", "udisks", arg1, arg2, NULL);
-               execl("/bin/udisks", "udisks", arg1, arg2, NULL);
-               exit(1);
+       int subfd = open_dev(memb->devnm);
+
+       if (subfd >= 0) {
+               /*
+                * Ignore the return value because it's necessary
+                * to handle failure condition here.
+                */
+               Manage_subdevs(memb->devnm, subfd, devlist, verbose,
+                              0, UOPT_UNDEFINED, 0);
+               close(subfd);
        }
-       while (pid > 0 && wait(&status) != pid)
-               ;
 }
 
 /*
@@ -1707,37 +1691,40 @@ int IncrementalRemove(char *devname, char *id_path, int verbose)
        struct mdstat_ent *ent;
        struct mddev_dev devlist;
        struct mdinfo mdi;
-       char buf[32];
+       char buf[SYSFS_MAX_BUF_SIZE];
 
        if (!id_path)
-               dprintf(Name ": incremental removal without --path <id_path> "
-                       "lacks the possibility to re-add new device in this "
-                       "port\n");
+               dprintf("incremental removal without --path <id_path> lacks the possibility to re-add new device in this port\n");
 
        if (strchr(devname, '/')) {
-               pr_err("incremental removal requires a "
-                       "kernel device name, not a file: %s\n", devname);
+               pr_err("incremental removal requires a kernel device name, not a file: %s\n", devname);
                return 1;
        }
        ent = mdstat_by_component(devname);
        if (!ent) {
                if (verbose >= 0)
-                       pr_err("%s does not appear to be a component "
-                              "of any array\n", devname);
+                       pr_err("%s does not appear to be a component of any array\n", devname);
                return 1;
        }
-       sysfs_init(&mdi, -1, ent->devnm);
-       if (sysfs_get_str(&mdi, NULL, "array_state",
-                         buf, sizeof(buf)) > 0) {
-               if (strncmp(buf, "active", 6) == 0 ||
-                   strncmp(buf, "clean", 5) == 0)
-                       sysfs_set_str(&mdi, NULL,
-                                     "array_state", "read-auto");
+       if (sysfs_init(&mdi, -1, ent->devnm)) {
+               pr_err("unable to initialize sysfs for: %s\n", devname);
+               return 1;
+       }
+       mdfd = open_dev_excl(ent->devnm);
+       if (is_fd_valid(mdfd)) {
+               close_fd(&mdfd);
+               if (sysfs_get_str(&mdi, NULL, "array_state",
+                                 buf, sizeof(buf)) > 0) {
+                       if (strncmp(buf, "active", 6) == 0 ||
+                           strncmp(buf, "clean", 5) == 0)
+                               sysfs_set_str(&mdi, NULL,
+                                             "array_state", "read-auto");
+               }
        }
        mdfd = open_dev(ent->devnm);
        if (mdfd < 0) {
                if (verbose >= 0)
-                       pr_err("Cannot open array %s!!\n", ent->dev);
+                       pr_err("Cannot open array %s!!\n", ent->devnm);
                free_mdstat(ent);
                return 1;
        }
@@ -1752,46 +1739,32 @@ int IncrementalRemove(char *devname, char *id_path, int verbose)
 
        memset(&devlist, 0, sizeof(devlist));
        devlist.devname = devname;
-       devlist.disposition = 'f';
+       devlist.disposition = 'I';
        /* for a container, we must fail each member array */
        if (ent->metadata_version &&
            strncmp(ent->metadata_version, "external:", 9) == 0) {
                struct mdstat_ent *mdstat = mdstat_read(0, 0);
                struct mdstat_ent *memb;
-               for (memb = mdstat ; memb ; memb = memb->next)
-                       if (is_container_member(memb, ent->dev)) {
-                               int subfd = open_dev(memb->devnm);
-                               if (subfd >= 0) {
-                                       rv |= Manage_subdevs(
-                                               memb->dev, subfd,
-                                               &devlist, verbose, 0,
-                                               NULL, 0);
-                                       close(subfd);
-                               }
-                       }
-               free_mdstat(mdstat);
-       } else
-               rv |= Manage_subdevs(ent->dev, mdfd, &devlist,
-                                   verbose, 0, NULL, 0);
-       if (rv & 2) {
-               /* Failed due to EBUSY, try to stop the array.
-                * Give udisks a chance to unmount it first.
-                */
-               int devid = devnm2devid(ent->devnm);
-               run_udisks("--unmount", map_dev(major(devid),minor(devid), 0));
-               rv = Manage_stop(ent->dev, mdfd, verbose, 1);
-               if (rv)
-                       /* At least we can try to trigger a 'remove' */
-                       sysfs_uevent(&mdi, "remove");
-               if (verbose) {
-                       if (rv)
-                               pr_err("Fail to stop %s too.\n", ent->devnm);
+               for (memb = mdstat ; memb ; memb = memb->next) {
+                       if (is_container_member(memb, ent->devnm))
+                               remove_from_member_array(memb,
+                                       &devlist, verbose);
                }
+               free_mdstat(mdstat);
        } else {
-               devlist.disposition = 'r';
-               rv = Manage_subdevs(ent->dev, mdfd, &devlist,
-                                   verbose, 0, NULL, 0);
+               /*
+                * This 'I' incremental remove is a try-best effort,
+                * the failure condition can be safely ignored
+                * because of the following up 'r' remove.
+                */
+               Manage_subdevs(ent->devnm, mdfd, &devlist,
+                              verbose, 0, UOPT_UNDEFINED, 0);
        }
+
+       devlist.disposition = 'r';
+       rv = Manage_subdevs(ent->devnm, mdfd, &devlist,
+                           verbose, 0, UOPT_UNDEFINED, 0);
+
        close(mdfd);
        free_mdstat(ent);
        return rv;