]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - Incremental.c
Allow domain_test to report that no domains were found.
[thirdparty/mdadm.git] / Incremental.c
index 0fef20f5206e3f7472d661befed3b9dd72a7938f..52558e4059d42aa4dd5fd7fb51925055ddaabcfe 100644 (file)
 #include       <dirent.h>
 #include       <ctype.h>
 
-static int count_active(struct supertype *st, int mdfd, char **availp,
+static int count_active(struct supertype *st, struct mdinfo *sra,
+                       int mdfd, char **availp,
                        struct mdinfo *info);
 static void find_reject(int mdfd, struct supertype *st, struct mdinfo *sra,
                        int number, __u64 events, int verbose,
                        char *array_name);
 static int try_spare(char *devname, int *dfdp, struct dev_policy *pol,
+                    struct map_ent *target,
                     struct supertype *st, int verbose);
 
 static int Incremental_container(struct supertype *st, char *devname,
@@ -92,17 +94,20 @@ int Incremental(char *devname, int verbose, int runstop,
         */
        struct stat stb;
        struct mdinfo info, dinfo;
+       struct mdinfo *sra = NULL, *d;
        struct mddev_ident *match;
        char chosen_name[1024];
        int rv = 1;
        struct map_ent *mp, *map = NULL;
        int dfd = -1, mdfd = -1;
-       char *avail;
+       char *avail = NULL;
        int active_disks;
        int trustworthy;
        char *name_to_use;
        mdu_array_info_t ainf;
        struct dev_policy *policy = NULL;
+       struct map_ent target_array;
+       int have_target;
 
        struct createinfo *ci = conf_get_create_info();
 
@@ -129,7 +134,7 @@ int Incremental(char *devname, int verbose, int runstop,
        if (must_be_container(dfd)) {
                if (!st)
                        st = super_by_fd(dfd, NULL);
-               if (st)
+               if (st && st->ss->load_container)
                        rv = st->ss->load_container(st, dfd, NULL);
 
                close(dfd);
@@ -172,13 +177,16 @@ int Incremental(char *devname, int verbose, int runstop,
        dinfo.disk.minor = minor(stb.st_rdev);
 
        policy = disk_policy(&dinfo);
+       have_target = policy_check_path(&dinfo, &target_array);
 
        if (st == NULL && (st = guess_super(dfd)) == NULL) {
                if (verbose >= 0)
                        fprintf(stderr, Name
                                ": no recognisable superblock on %s.\n",
                                devname);
-               rv = try_spare(devname, &dfd, policy, st, verbose);
+               rv = try_spare(devname, &dfd, policy,
+                              have_target ? &target_array : NULL,
+                              st, verbose);
                goto out;
        }
        if (st->ss->compare_super == NULL ||
@@ -186,7 +194,9 @@ int Incremental(char *devname, int verbose, int runstop,
                if (verbose >= 0)
                        fprintf(stderr, Name ": no RAID superblock on %s.\n",
                                devname);
-               rv = try_spare(devname, &dfd, policy, st, verbose);
+               rv = try_spare(devname, &dfd, policy,
+                              have_target ? &target_array : NULL,
+                              st, verbose);
                free(st);
                goto out;
        }
@@ -275,7 +285,6 @@ int Incremental(char *devname, int verbose, int runstop,
                mdfd = -1;
 
        if (mdfd < 0) {
-               struct mdinfo *sra;
 
                /* Couldn't find an existing array, maybe make a new one */
                mdfd = create_mddev(match ? match->devname : NULL,
@@ -303,7 +312,9 @@ int Incremental(char *devname, int verbose, int runstop,
                        rv = 2;
                        goto out;
                }
-               sra = sysfs_read(mdfd, fd2devnum(mdfd), GET_DEVS);
+               sra = sysfs_read(mdfd, -1, (GET_DEVS | GET_STATE |
+                                           GET_OFFSET | GET_SIZE));
+       
                if (!sra || !sra->devs || sra->devs->disk.raid_disk >= 0) {
                        /* It really should be 'none' - must be old buggy
                         * kernel, and mdadm -I may not be able to complete.
@@ -318,7 +329,6 @@ int Incremental(char *devname, int verbose, int runstop,
                        goto out;
                }
                info.array.working_disks = 1;
-               sysfs_free(sra);
                /* 6/ Make sure /var/run/mdadm.map contains this array. */
                map_update(&map, fd2devnum(mdfd),
                           info.text_version,
@@ -331,10 +341,12 @@ int Incremental(char *devname, int verbose, int runstop,
                char dn[20];
                int dfd2;
                int err;
-               struct mdinfo *sra;
                struct supertype *st2;
                struct mdinfo info2, *d;
 
+               sra = sysfs_read(mdfd, -1, (GET_DEVS | GET_STATE |
+                                           GET_OFFSET | GET_SIZE));
+       
                if (mp->path)
                        strcpy(chosen_name, mp->path);
                else
@@ -346,22 +358,17 @@ int Incremental(char *devname, int verbose, int runstop,
                 * array was possibly started early and our best bet is
                 * to add this anyway.
                 * Also if action policy is re-add or better we allow
-                * re-add
+                * re-add.
+                * This doesn't apply to containers as the 'non-spare'
+                * flag has a different meaning.  The test has to happen
+                * at the device level there
                 */
-               if ((info.disk.state & (1<<MD_DISK_SYNC)) != 0
+               if (!st->ss->external
+                   && (info.disk.state & (1<<MD_DISK_SYNC)) != 0
                    && ! policy_action_allows(policy, st->ss->name,
                                              act_re_add)
                    && runstop < 1) {
-                       int active = 0;
-                       
-                       if (st->ss->external) {
-                               char *devname = devnum2devname(fd2devnum(mdfd));
-
-                               active = devname && is_container_active(devname);
-                               free(devname);
-                       } else if (ioctl(mdfd, GET_ARRAY_INFO, &ainf) == 0)
-                               active = 1;
-                       if (active) {
+                       if (ioctl(mdfd, GET_ARRAY_INFO, &ainf) == 0) {
                                fprintf(stderr, Name
                                        ": not adding %s to active array (without --run) %s\n",
                                        devname, chosen_name);
@@ -369,7 +376,6 @@ int Incremental(char *devname, int verbose, int runstop,
                                goto out;
                        }
                }
-               sra = sysfs_read(mdfd, fd2devnum(mdfd), (GET_DEVS | GET_STATE));
                if (!sra) {
                        rv = 2;
                        goto out;
@@ -432,6 +438,7 @@ int Incremental(char *devname, int verbose, int runstop,
        /* 7/ Is there enough devices to possibly start the array? */
        /* 7a/ if not, finish with success. */
        if (info.array.level == LEVEL_CONTAINER) {
+               char *devname = NULL;
                /* Try to assemble within the container */
                map_unlock(&map);
                sysfs_uevent(&info, "change");
@@ -440,7 +447,10 @@ int Incremental(char *devname, int verbose, int runstop,
                                ": container %s now has %d devices\n",
                                chosen_name, info.array.working_disks);
                wait_for(chosen_name, mdfd);
+               if (st->ss->external)
+                       devname = devnum2devname(fd2devnum(mdfd));
                close(mdfd);
+               sysfs_free(sra);
                rv = Incremental(chosen_name, verbose, runstop,
                                 NULL, homehost, require_homehost, autof);
                if (rv == 1)
@@ -448,14 +458,26 @@ int Incremental(char *devname, int verbose, int runstop,
                         * have enough devices to start yet
                         */
                        rv = 0;
+               /* after spare is added, ping monitor for external metadata
+                * so that it can eg. try to rebuild degraded array */
+               if (st->ss->external) {
+                       ping_monitor(devname);
+                       free(devname);
+               }
                return rv;
        }
-       avail = NULL;
-       active_disks = count_active(st, mdfd, &avail, &info);
+
+       /* We have added something to the array, so need to re-read the
+        * state.  Eventually this state should be kept up-to-date as
+        * things change.
+        */
+       sysfs_free(sra);
+       sra = sysfs_read(mdfd, -1, (GET_DEVS | GET_STATE |
+                                   GET_OFFSET | GET_SIZE));
+       active_disks = count_active(st, sra, mdfd, &avail, &info);
        if (enough(info.array.level, info.array.raid_disks,
                   info.array.layout, info.array.state & 1,
                   avail, active_disks) == 0) {
-               free(avail);
                if (verbose >= 0)
                        fprintf(stderr, Name
                             ": %s attached to %s, not enough to start (%d).\n",
@@ -464,7 +486,6 @@ int Incremental(char *devname, int verbose, int runstop,
                rv = 0;
                goto out;
        }
-       free(avail);
 
        /* 7b/ if yes, */
        /* - if number of OK devices match expected, or -R and there */
@@ -484,7 +505,7 @@ int Incremental(char *devname, int verbose, int runstop,
 
        map_unlock(&map);
        if (runstop > 0 || active_disks >= info.array.working_disks) {
-               struct mdinfo *sra, *dsk;
+               struct mdinfo *dsk;
                /* Let's try to start it */
                if (match && match->bitmap_file) {
                        int bmfd = open(match->bitmap_file, O_RDWR);
@@ -503,9 +524,13 @@ int Incremental(char *devname, int verbose, int runstop,
                        }
                        close(bmfd);
                }
-               /* GET_* needed so add_disk works below */
-               sra = sysfs_read(mdfd, fd2devnum(mdfd),
-                                GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE);
+               /* Need to remove from the array any devices which
+                * 'count_active' discerned were too old or inappropriate
+                */
+               for (d = sra ? sra->devs : NULL ; d ; d = d->next)
+                       if (d->disk.state & (1<<MD_DISK_REMOVED))
+                               remove_disk(mdfd, st, sra, d);
+
                if ((sra == NULL || active_disks >= info.array.working_disks)
                    && trustworthy != FOREIGN)
                        rv = ioctl(mdfd, RUN_ARRAY, NULL);
@@ -546,12 +571,15 @@ int Incremental(char *devname, int verbose, int runstop,
                rv = 0;
        }
 out:
+       free(avail);
        if (dfd >= 0)
                close(dfd);
        if (mdfd >= 0)
                close(mdfd);
        if (policy)
                dev_policy_free(policy);
+       if (sra)
+               sysfs_free(sra);
        return rv;
 }
 
@@ -672,20 +700,28 @@ static void find_reject(int mdfd, struct supertype *st, struct mdinfo *sra,
        }
 }
 
-static int count_active(struct supertype *st, int mdfd, char **availp,
+static int count_active(struct supertype *st, struct mdinfo *sra,
+                       int mdfd, char **availp,
                        struct mdinfo *bestinfo)
 {
        /* count how many devices in sra think they are active */
        struct mdinfo *d;
-       int cnt = 0, cnt1 = 0;
+       int cnt = 0;
        __u64 max_events = 0;
-       struct mdinfo *sra = sysfs_read(mdfd, -1, GET_DEVS | GET_STATE);
        char *avail = NULL;
+       int *best;
+       char *devmap = NULL;
+       int numdevs = 0;
+       int devnum;
+       int b, i;
+       int raid_disks = 0;
 
        if (!sra)
                return 0;
 
-       for (d = sra->devs ; d ; d = d->next) {
+       for (d = sra->devs ; d ; d = d->next)
+               numdevs++;
+       for (d = sra->devs, devnum=0 ; d ; d = d->next, devnum++) {
                char dn[30];
                int dfd;
                int ok;
@@ -699,15 +735,21 @@ static int count_active(struct supertype *st, int mdfd, char **availp,
                close(dfd);
                if (ok != 0)
                        continue;
-               st->ss->getinfo_super(st, &info, NULL);
+               info.array.raid_disks = raid_disks;
+               st->ss->getinfo_super(st, &info, devmap + raid_disks * devnum);
                if (!avail) {
-                       avail = malloc(info.array.raid_disks);
+                       raid_disks = info.array.raid_disks;
+                       avail = calloc(raid_disks, 1);
                        if (!avail) {
                                fprintf(stderr, Name ": out of memory.\n");
                                exit(1);
                        }
-                       memset(avail, 0, info.array.raid_disks);
                        *availp = avail;
+
+                       best = calloc(raid_disks, sizeof(int));
+                       devmap = calloc(raid_disks * numdevs, 1);
+
+                       st->ss->getinfo_super(st, &info, devmap);
                }
 
                if (info.disk.state & (1<<MD_DISK_SYNC))
@@ -716,27 +758,28 @@ static int count_active(struct supertype *st, int mdfd, char **availp,
                                cnt++;
                                max_events = info.events;
                                avail[info.disk.raid_disk] = 2;
+                               best[info.disk.raid_disk] = devnum;
                                st->ss->getinfo_super(st, bestinfo, NULL);
                        } else if (info.events == max_events) {
-                               cnt++;
                                avail[info.disk.raid_disk] = 2;
+                               best[info.disk.raid_disk] = devnum;
                        } else if (info.events == max_events-1) {
-                               cnt1++;
-                               avail[info.disk.raid_disk] = 1;
+                               if (avail[info.disk.raid_disk] == 0) {
+                                       avail[info.disk.raid_disk] = 1;
+                                       best[info.disk.raid_disk] = devnum;
+                               }
                        } else if (info.events < max_events - 1)
                                ;
                        else if (info.events == max_events+1) {
                                int i;
-                               cnt1 = cnt;
-                               cnt = 1;
                                max_events = info.events;
-                               for (i=0; i<info.array.raid_disks; i++)
+                               for (i=0; i < raid_disks; i++)
                                        if (avail[i])
                                                avail[i]--;
                                avail[info.disk.raid_disk] = 2;
+                               best[info.disk.raid_disk] = devnum;
                                st->ss->getinfo_super(st, bestinfo, NULL);
                        } else { /* info.events much bigger */
-                               cnt = 1; cnt1 = 0;
                                memset(avail, 0, info.disk.raid_disk);
                                max_events = info.events;
                                avail[info.disk.raid_disk] = 2;
@@ -745,19 +788,74 @@ static int count_active(struct supertype *st, int mdfd, char **availp,
                }
                st->ss->free_super(st);
        }
-       return cnt + cnt1;
+       if (!avail)
+               return 0;
+       /* We need to reject any device that thinks the best device is
+        * failed or missing */
+       for (b = 0; b < raid_disks; b++)
+               if (avail[b] == 2)
+                       break;
+       cnt = 0;
+       for (i = 0 ; i < raid_disks ; i++) {
+               if (i != b && avail[i])
+                       if (devmap[raid_disks * best[i] + b] == 0) {
+                               /* This device thinks 'b' is failed -
+                                * don't use it */
+                               devnum = best[i];
+                               for (d=sra->devs ; devnum; d = d->next)
+                                       devnum--;
+                               d->disk.state |= (1 << MD_DISK_REMOVED);
+                               avail[i] = 0;
+                       }
+               if (avail[i])
+                       cnt++;
+       }
+       free(best);
+       free(devmap);
+       return cnt;
+}
+
+/* test if container has degraded member(s) */
+static int container_members_max_degradation(struct map_ent *map, struct map_ent *me)
+{
+       mdu_array_info_t array;
+       int afd;
+       int max_degraded = 0;
+
+       for(; map; map = map->next) {
+               if (!is_subarray(map->metadata) ||
+                   devname2devnum(map->metadata+1) != me->devnum)
+                       continue;
+               afd = open_dev(map->devnum);
+               if (afd < 0)
+                       continue;
+               /* most accurate information regarding array degradation */
+               if (ioctl(afd, GET_ARRAY_INFO, &array) >= 0) {
+                       int degraded = array.raid_disks - array.active_disks -
+                                      array.spare_disks;
+                       if (degraded > max_degraded)
+                               max_degraded = degraded;
+               }
+               close(afd);
+       }
+       return (max_degraded);
 }
 
 static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
+                          struct map_ent *target, int bare,
                           struct supertype *st, int verbose)
 {
        /* This device doesn't have any md metadata
-        * If it is 'bare' and theh device policy allows 'spare' look for
-        * an array or container to attach it to.
+        * The device policy allows 'spare' and if !bare, it allows spare-same-slot.
+        * If 'st' is not set, then we only know that some metadata allows this,
+        * others possibly don't.
+        * So look for a container or array to attach the device to.
+        * Prefer 'target' if that is set and the array is found.
+        *
         * If st is set, then only arrays of that type are considered
         * Return 0 on success, or some exit code on failure, probably 1.
         */
-       int rv = -1;
+       int rv = 1;
        struct stat stb;
        struct map_ent *mp, *map = NULL;
        struct mdinfo *chosen = NULL;
@@ -785,6 +883,7 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
                struct domainlist *dl = NULL;
                struct mdinfo *sra;
                unsigned long long devsize;
+               unsigned long long component_size;
 
                if (is_subarray(mp->metadata))
                        continue;
@@ -810,7 +909,7 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
                                         GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE|
                                         GET_COMPONENT|GET_VERSION);
                        if (sra)
-                               sra->array.failed_disks = 0;
+                               sra->array.failed_disks = -1;
                }
                if (!sra)
                        continue;
@@ -820,20 +919,87 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
                        for(i=0; !st2 && superlist[i]; i++)
                                st2 = superlist[i]->match_metadata_desc(
                                        sra->text_version);
+                       if (!st2) {
+                               if (verbose > 1)
+                                       fprintf(stderr, Name ": not adding %s to %s"
+                                               " as metadata not recognised.\n",
+                                               devname, mp->path);
+                               goto next;
+                       }
+                       /* Need to double check the 'act_spare' permissions applies
+                        * to this metadata.
+                        */
+                       if (!policy_action_allows(pol, st2->ss->name, act_spare))
+                               goto next;
+                       if (!bare && !policy_action_allows(pol, st2->ss->name,
+                                                          act_spare_same_slot))
+                               goto next;
                } else
                        st2 = st;
+               /* update number of failed disks for mostly degraded
+                * container member */
+               if (sra->array.failed_disks == -1)
+                       sra->array.failed_disks = container_members_max_degradation(map, mp);
+
                get_dev_size(dfd, NULL, &devsize);
-               if (st2->ss->avail_size(st2, devsize) < sra->component_size) {
+               if (sra->component_size == 0) {
+                       /* true for containers, here we must read superblock
+                        * to obtain minimum spare size */
+                       struct supertype *st3 = dup_super(st2);
+                       int mdfd = open_dev(mp->devnum);
+                       if (!mdfd)
+                               goto next;
+                       if (st3->ss->load_container &&
+                           !st3->ss->load_container(st3, mdfd, mp->path)) {
+                               component_size = st3->ss->min_acceptable_spare_size(st3);
+                               st3->ss->free_super(st3);
+                       }
+                       free(st3);
+                       close(mdfd);
+               }
+               if ((sra->component_size > 0 &&
+                    st2->ss->avail_size(st2, devsize) < sra->component_size)
+                   ||
+                   (sra->component_size == 0 && devsize < component_size)) {
                        if (verbose > 1)
                                fprintf(stderr, Name ": not adding %s to %s as it is too small\n",
                                        devname, mp->path);
                        goto next;
                }
+               /* test against target.
+                * If 'target' is set and 'bare' is false, we only accept
+                * arrays/containers that match 'target'.
+                * If 'target' is set and 'bare' is true, we prefer the
+                * array which matches 'target'.
+                * target is considered only if we deal with degraded array
+                */
+               if (target && policy_action_allows(pol, st2->ss->name,
+                                                  act_spare_same_slot)) {
+                       if (strcmp(target->metadata, mp->metadata) == 0 &&
+                           memcmp(target->uuid, mp->uuid,
+                                  sizeof(target->uuid)) == 0 &&
+                           sra->array.failed_disks > 0) {
+                               /* This is our target!! */
+                               if (chosen)
+                                       sysfs_free(chosen);
+                               chosen = sra;
+                               sra = NULL;
+                               /* skip to end so we don't check any more */
+                               while (mp->next)
+                                       mp = mp->next;
+                               goto next;
+                       }
+                       /* not our target */
+                       if (!bare)
+                               goto next;
+               }
+
                dl = domain_from_array(sra, st2->ss->name);
-               if (!domain_test(dl, pol, st2->ss->name)) {
+               if (domain_test(dl, pol, st2->ss->name) != 1) {
                        /* domain test fails */
                        if (verbose > 1)
-                               fprintf(stderr, Name ": not adding %s to %s as it is not in a compatible domain\n",
+                               fprintf(stderr, Name ": not adding %s to %s as"
+                                       " it is not in a compatible domain\n",
                                        devname, mp->path);
 
                        goto next;
@@ -872,7 +1038,7 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
                        close(dfd);
                        *dfdp = -1;
                        rv =  Manage_subdevs(chosen->sys_name, mdfd, &devlist,
-                                            -1, 0);
+                                            -1, 0, NULL);
                        close(mdfd);
                }
                if (verbose > 0) {
@@ -885,7 +1051,7 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
                }
                sysfs_free(chosen);
        }
-       return rv ? 0 : 1;
+       return rv;
 }
 
 static int partition_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
@@ -945,7 +1111,7 @@ static int partition_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
                pol2 = path_policy(de->d_name, type_disk);
 
                domain_merge(&domlist, pol2, st ? st->ss->name : NULL);
-               if (domain_test(domlist, pol, st ? st->ss->name : NULL) == 0)
+               if (domain_test(domlist, pol, st ? st->ss->name : NULL) != 1)
                        /* new device is incompatible with this device. */
                        goto next;
 
@@ -971,9 +1137,14 @@ static int partition_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
                if (!st) {
                        /* Check domain policy again, this time referring to metadata */
                        domain_merge(&domlist, pol2, st2->ss->name);
-                       if (domain_test(domlist, pol, st2->ss->name) == 0)
+                       if (domain_test(domlist, pol, st2->ss->name) != 1)
                                /* Incompatible devices for this metadata type */
                                goto next;
+                       if (!policy_action_allows(pol, st2->ss->name, act_spare))
+                               /* Some partition types allow sparing, but not
+                                * this one.
+                                */
+                               goto next;
                }
 
                st2->ss->getinfo_super(st2, &info, NULL);
@@ -1030,6 +1201,34 @@ static int partition_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
        return 0;
 }
 
+static int is_bare(int dfd)
+{
+       unsigned long long size = 0;
+       char bufpad[4096 + 4096];
+       char *buf = (char*)(((long)bufpad + 4096) & ~4095);
+
+       if (lseek(dfd, 0, SEEK_SET) != 0 ||
+           read(dfd, buf, 4096) != 4096)
+               return 0;
+
+       if (buf[0] != '\0' && buf[0] != '\x5a' && buf[0] != '\xff')
+               return 0;
+       if (memcmp(buf, buf+1, 4095) != 0)
+               return 0;
+
+       /* OK, first 4K appear blank, try the end. */
+       get_dev_size(dfd, NULL, &size);
+       if (lseek(dfd, size-4096, SEEK_SET) < 0 ||
+           read(dfd, buf, 4096) != 4096)
+               return 0;
+
+       if (buf[0] != '\0' && buf[0] != '\x5a' && buf[0] != '\xff')
+               return 0;
+       if (memcmp(buf, buf+1, 4095) != 0)
+               return 0;
+
+       return 1;
+}
 
 /* adding a spare to a regular array is quite different from adding one to
  * a set-of-partitions virtual array.
@@ -1037,63 +1236,63 @@ static int partition_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
  * Arrays are given priority over partitions.
  */
 static int try_spare(char *devname, int *dfdp, struct dev_policy *pol,
+                    struct map_ent *target,
                     struct supertype *st, int verbose)
 {
        int i;
        int rv;
        int arrays_ok = 0;
        int partitions_ok = 0;
-       char bufpad[4096 + 4096];
-       char *buf = (char*)(((long)bufpad + 4096) & ~4095);
        int dfd = *dfdp;
+       int bare;
 
-       /* Can only add a spare if device has at least one domains */
+       /* Can only add a spare if device has at least one domain */
        if (pol_find(pol, pol_domain) == NULL)
                return 1;
        /* And only if some action allows spares */
        if (!policy_action_allows(pol, st?st->ss->name:NULL, act_spare))
                return 1;
 
-       /* Now check if the device is bare - we don't add non-bare devices
-        * yet even if action=-spare
+       /* Now check if the device is bare.
+        * bare devices can always be added as a spare
+        * non-bare devices can only be added if spare-same-slot is permitted,
+        * and this device is replacing a previous device - in which case 'target'
+        * will be set.
         */
+       if (!is_bare(dfd)) {
+               /* Must have a target and allow same_slot */
+               /* Later - may allow force_spare without target */
+               if (!target ||
+                   !policy_action_allows(pol, st?st->ss->name:NULL,
+                                         act_spare_same_slot)) {
+                       if (verbose > 1)
+                               fprintf(stderr, Name ": %s is not bare, so not "
+                                       "considering as a spare\n",
+                                       devname);
+                       return 1;
+               }
+               bare = 0;
+       } else
+               bare = 1;
 
-       if (lseek(dfd, 0, SEEK_SET) != 0 ||
-           read(dfd, buf, 4096) != 4096) {
-       not_bare:
-               if (verbose > 1)
-                       fprintf(stderr, Name ": %s is not bare, so not considering as a spare\n",
-                               devname);
-               return 1;
-       }
-       if (buf[0] != '\0' && buf[0] != '\x5a' && buf[0] != '\xff')
-               goto not_bare;
-       if (memcmp(buf, buf+1, 4095) != 0)
-               goto not_bare;
-
-       /* OK, first 4K appear blank, try the end. */
-       if (lseek(dfd, -4096, SEEK_END) < 0 ||
-           read(dfd, buf, 4096) != 4096)
-               goto not_bare;
-
-       if (buf[0] != '\0' && buf[0] != '\x5a' && buf[0] != '\xff')
-               goto not_bare;
-       if (memcmp(buf, buf+1, 4095) != 0)
-               goto not_bare;
-
-       /* This device passes our test for 'is bare'.
-        * Let's see what policy allows for such things.
+       /* It might be OK to add this device to an array - need to see
+        * what arrays might be candidates.
         */
        if (st) {
                /* just try try 'array' or 'partition' based on this metadata */
                if (st->ss->add_to_super)
-                       return array_try_spare(devname, dfdp, pol,
+                       return array_try_spare(devname, dfdp, pol, target, bare,
                                               st, verbose);
                else
                        return partition_try_spare(devname, dfdp, pol,
                                                   st, verbose);
        }
-       /* Now see which metadata type support spare */
+       /* No metadata was specified or found so options are open.
+        * Check for whether any array metadata, or any partition metadata
+        * might allow adding the spare.  This check is just help to avoid
+        * a more costly scan of all arrays when we can be sure that will
+        * fail.
+        */
        for (i = 0; (!arrays_ok || !partitions_ok) && superlist[i] ; i++) {
                if (superlist[i]->add_to_super && !arrays_ok &&
                    policy_action_allows(pol, superlist[i]->name, act_spare))
@@ -1102,10 +1301,11 @@ static int try_spare(char *devname, int *dfdp, struct dev_policy *pol,
                    policy_action_allows(pol, superlist[i]->name, act_spare))
                        partitions_ok = 1;
        }
-       rv = 0;
+       rv = 1;
        if (arrays_ok)
-               rv = array_try_spare(devname, dfdp, pol, st, verbose);
-       if (rv == 0 && partitions_ok)
+               rv = array_try_spare(devname, dfdp, pol, target, bare,
+                                    st, verbose);
+       if (rv != 0 && partitions_ok)
                rv = partition_try_spare(devname, dfdp, pol, st, verbose);
        return rv;
 }
@@ -1229,6 +1429,10 @@ static int Incremental_container(struct supertype *st, char *devname,
        int trustworthy;
        struct mddev_ident *match;
        int rv = 0;
+       struct domainlist *domains;
+       struct map_ent *smp;
+       int suuid[4];
+       int sfd;
 
        memset(&info, 0, sizeof(info));
        st->ss->getinfo_super(st, &info, NULL);
@@ -1260,6 +1464,12 @@ static int Incremental_container(struct supertype *st, char *devname,
        if (map_lock(&map))
                fprintf(stderr, Name ": failed to get exclusive lock on "
                        "mapfile\n");
+       /* do not assemble arrays that might have bad blocks */
+       if (list->array.state & (1<<MD_SB_BBM_ERRORS)) {
+               fprintf(stderr, Name ": BBM log found in metadata. "
+                                       "Cannot activate array(s).\n");
+               list = NULL;
+       }
 
        for (ra = list ; ra ; ra = ra->next) {
                int mdfd;
@@ -1342,6 +1552,52 @@ static int Incremental_container(struct supertype *st, char *devname,
                assemble_container_content(st, mdfd, ra, runstop,
                                           chosen_name, verbose);
        }
+
+       /* Now move all suitable spares from spare container */
+       domains = domain_from_array(list, st->ss->name);
+       memcpy(suuid, uuid_zero, sizeof(int[4]));
+       if (domains &&
+           (smp = map_by_uuid(&map, suuid)) != NULL &&
+           (sfd = open(smp->path, O_RDONLY)) >= 0) {
+               /* spare container found */
+               struct supertype *sst =
+                       super_imsm.match_metadata_desc("imsm");
+               struct mdinfo *sinfo;
+               unsigned long long min_size = 0;
+               if (st->ss->min_acceptable_spare_size)
+                       min_size = st->ss->min_acceptable_spare_size(st);
+               if (!sst->ss->load_container(sst, sfd, NULL)) {
+                       close(sfd);
+                       sinfo = container_choose_spares(sst, min_size,
+                                                       domains, NULL,
+                                                       st->ss->name, 0);
+                       sst->ss->free_super(sst);
+                       if (sinfo){
+                               int count = 0;
+                               struct mdinfo *disks = sinfo->devs;
+                               while (disks) {
+                                       /* move spare from spare
+                                        * container to currently
+                                        * assembled one
+                                        */
+                                       if (move_spare(
+                                                   smp->path,
+                                                   devname,
+                                                   makedev(disks->disk.major,
+                                                           disks->disk.minor)))
+                                               count++;
+                                       disks = disks->next;
+                               }
+                               if (count)
+                                       fprintf(stderr, Name
+                                               ": Added %d spare%s to %s\n",
+                                               count, count>1?"s":"", devname);
+                       }
+                       sysfs_free(sinfo);
+               } else
+                       close(sfd);
+       }
+       domain_free(domains);
        map_unlock(&map);
        return 0;
 }
@@ -1385,6 +1641,15 @@ int IncrementalRemove(char *devname, char *id_path, int verbose)
                free_mdstat(ent);
                return 1;
        }
+
+       if (id_path) {
+               struct map_ent *map = NULL, *me;
+               me = map_by_devnum(&map, ent->devnum);
+               if (me)
+                       policy_save_path(id_path, me);
+               map_free(map);
+       }
+
        memset(&devlist, 0, sizeof(devlist));
        devlist.devname = devname;
        devlist.disposition = 'f';
@@ -1398,15 +1663,16 @@ int IncrementalRemove(char *devname, char *id_path, int verbose)
                                int subfd = open_dev(memb->devnum);
                                if (subfd >= 0) {
                                        Manage_subdevs(memb->dev, subfd,
-                                                      &devlist, verbose, 0);
+                                                      &devlist, verbose, 0,
+                                                      NULL);
                                        close(subfd);
                                }
                        }
                free_mdstat(mdstat);
        } else
-               Manage_subdevs(ent->dev, mdfd, &devlist, verbose, 0);
+               Manage_subdevs(ent->dev, mdfd, &devlist, verbose, 0, NULL);
        devlist.disposition = 'r';
-       rv = Manage_subdevs(ent->dev, mdfd, &devlist, verbose, 0);
+       rv = Manage_subdevs(ent->dev, mdfd, &devlist, verbose, 0, NULL);
        close(mdfd);
        free_mdstat(ent);
        return rv;