]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - Incremental.c
Incremental - avoid including wayward devices.
[thirdparty/mdadm.git] / Incremental.c
index 3e361d0ab60b02566c069b3a9de4a10c0da1df40..8cae1ee5cb9f9bf54490adc9993117e41de81cf8 100644 (file)
 #include       <dirent.h>
 #include       <ctype.h>
 
-static int count_active(struct supertype *st, int mdfd, char **availp,
+static int count_active(struct supertype *st, struct mdinfo *sra,
+                       int mdfd, char **availp,
                        struct mdinfo *info);
 static void find_reject(int mdfd, struct supertype *st, struct mdinfo *sra,
                        int number, __u64 events, int verbose,
                        char *array_name);
 static int try_spare(char *devname, int *dfdp, struct dev_policy *pol,
+                    struct map_ent *target,
                     struct supertype *st, int verbose);
 
 static int Incremental_container(struct supertype *st, char *devname,
@@ -92,17 +94,20 @@ int Incremental(char *devname, int verbose, int runstop,
         */
        struct stat stb;
        struct mdinfo info, dinfo;
+       struct mdinfo *sra = NULL, *d;
        struct mddev_ident *match;
        char chosen_name[1024];
        int rv = 1;
        struct map_ent *mp, *map = NULL;
        int dfd = -1, mdfd = -1;
-       char *avail;
+       char *avail = NULL;
        int active_disks;
        int trustworthy;
        char *name_to_use;
        mdu_array_info_t ainf;
        struct dev_policy *policy = NULL;
+       struct map_ent target_array;
+       int have_target;
 
        struct createinfo *ci = conf_get_create_info();
 
@@ -172,13 +177,16 @@ int Incremental(char *devname, int verbose, int runstop,
        dinfo.disk.minor = minor(stb.st_rdev);
 
        policy = disk_policy(&dinfo);
+       have_target = policy_check_path(&dinfo, &target_array);
 
        if (st == NULL && (st = guess_super(dfd)) == NULL) {
                if (verbose >= 0)
                        fprintf(stderr, Name
                                ": no recognisable superblock on %s.\n",
                                devname);
-               rv = try_spare(devname, &dfd, policy, st, verbose);
+               rv = try_spare(devname, &dfd, policy,
+                              have_target ? &target_array : NULL,
+                              st, verbose);
                goto out;
        }
        if (st->ss->compare_super == NULL ||
@@ -186,7 +194,9 @@ int Incremental(char *devname, int verbose, int runstop,
                if (verbose >= 0)
                        fprintf(stderr, Name ": no RAID superblock on %s.\n",
                                devname);
-               rv = try_spare(devname, &dfd, policy, st, verbose);
+               rv = try_spare(devname, &dfd, policy,
+                              have_target ? &target_array : NULL,
+                              st, verbose);
                free(st);
                goto out;
        }
@@ -275,7 +285,6 @@ int Incremental(char *devname, int verbose, int runstop,
                mdfd = -1;
 
        if (mdfd < 0) {
-               struct mdinfo *sra;
 
                /* Couldn't find an existing array, maybe make a new one */
                mdfd = create_mddev(match ? match->devname : NULL,
@@ -303,7 +312,9 @@ int Incremental(char *devname, int verbose, int runstop,
                        rv = 2;
                        goto out;
                }
-               sra = sysfs_read(mdfd, fd2devnum(mdfd), GET_DEVS);
+               sra = sysfs_read(mdfd, -1, (GET_DEVS | GET_STATE |
+                                           GET_OFFSET | GET_SIZE));
+       
                if (!sra || !sra->devs || sra->devs->disk.raid_disk >= 0) {
                        /* It really should be 'none' - must be old buggy
                         * kernel, and mdadm -I may not be able to complete.
@@ -318,7 +329,6 @@ int Incremental(char *devname, int verbose, int runstop,
                        goto out;
                }
                info.array.working_disks = 1;
-               sysfs_free(sra);
                /* 6/ Make sure /var/run/mdadm.map contains this array. */
                map_update(&map, fd2devnum(mdfd),
                           info.text_version,
@@ -331,10 +341,12 @@ int Incremental(char *devname, int verbose, int runstop,
                char dn[20];
                int dfd2;
                int err;
-               struct mdinfo *sra;
                struct supertype *st2;
                struct mdinfo info2, *d;
 
+               sra = sysfs_read(mdfd, -1, (GET_DEVS | GET_STATE |
+                                           GET_OFFSET | GET_SIZE));
+       
                if (mp->path)
                        strcpy(chosen_name, mp->path);
                else
@@ -369,7 +381,6 @@ int Incremental(char *devname, int verbose, int runstop,
                                goto out;
                        }
                }
-               sra = sysfs_read(mdfd, fd2devnum(mdfd), (GET_DEVS | GET_STATE));
                if (!sra) {
                        rv = 2;
                        goto out;
@@ -441,6 +452,7 @@ int Incremental(char *devname, int verbose, int runstop,
                                chosen_name, info.array.working_disks);
                wait_for(chosen_name, mdfd);
                close(mdfd);
+               sysfs_free(sra);
                rv = Incremental(chosen_name, verbose, runstop,
                                 NULL, homehost, require_homehost, autof);
                if (rv == 1)
@@ -450,12 +462,18 @@ int Incremental(char *devname, int verbose, int runstop,
                        rv = 0;
                return rv;
        }
-       avail = NULL;
-       active_disks = count_active(st, mdfd, &avail, &info);
+
+       /* We have added something to the array, so need to re-read the
+        * state.  Eventually this state should be kept up-to-date as
+        * things change.
+        */
+       sysfs_free(sra);
+       sra = sysfs_read(mdfd, -1, (GET_DEVS | GET_STATE |
+                                   GET_OFFSET | GET_SIZE));
+       active_disks = count_active(st, sra, mdfd, &avail, &info);
        if (enough(info.array.level, info.array.raid_disks,
                   info.array.layout, info.array.state & 1,
                   avail, active_disks) == 0) {
-               free(avail);
                if (verbose >= 0)
                        fprintf(stderr, Name
                             ": %s attached to %s, not enough to start (%d).\n",
@@ -464,7 +482,6 @@ int Incremental(char *devname, int verbose, int runstop,
                rv = 0;
                goto out;
        }
-       free(avail);
 
        /* 7b/ if yes, */
        /* - if number of OK devices match expected, or -R and there */
@@ -484,7 +501,7 @@ int Incremental(char *devname, int verbose, int runstop,
 
        map_unlock(&map);
        if (runstop > 0 || active_disks >= info.array.working_disks) {
-               struct mdinfo *sra, *dsk;
+               struct mdinfo *dsk;
                /* Let's try to start it */
                if (match && match->bitmap_file) {
                        int bmfd = open(match->bitmap_file, O_RDWR);
@@ -503,9 +520,13 @@ int Incremental(char *devname, int verbose, int runstop,
                        }
                        close(bmfd);
                }
-               /* GET_* needed so add_disk works below */
-               sra = sysfs_read(mdfd, fd2devnum(mdfd),
-                                GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE);
+               /* Need to remove from the array any devices which
+                * 'count_active' discerned were too old or inappropriate
+                */
+               for (d = sra ? sra->devs : NULL ; d ; d = d->next)
+                       if (d->disk.state & (1<<MD_DISK_REMOVED))
+                               remove_disk(mdfd, st, sra, d);
+
                if ((sra == NULL || active_disks >= info.array.working_disks)
                    && trustworthy != FOREIGN)
                        rv = ioctl(mdfd, RUN_ARRAY, NULL);
@@ -546,12 +567,15 @@ int Incremental(char *devname, int verbose, int runstop,
                rv = 0;
        }
 out:
+       free(avail);
        if (dfd >= 0)
                close(dfd);
        if (mdfd >= 0)
                close(mdfd);
        if (policy)
                dev_policy_free(policy);
+       if (sra)
+               sysfs_free(sra);
        return rv;
 }
 
@@ -672,20 +696,28 @@ static void find_reject(int mdfd, struct supertype *st, struct mdinfo *sra,
        }
 }
 
-static int count_active(struct supertype *st, int mdfd, char **availp,
+static int count_active(struct supertype *st, struct mdinfo *sra,
+                       int mdfd, char **availp,
                        struct mdinfo *bestinfo)
 {
        /* count how many devices in sra think they are active */
        struct mdinfo *d;
-       int cnt = 0, cnt1 = 0;
+       int cnt = 0;
        __u64 max_events = 0;
-       struct mdinfo *sra = sysfs_read(mdfd, -1, GET_DEVS | GET_STATE);
        char *avail = NULL;
+       int *best;
+       char *devmap = NULL;
+       int numdevs = 0;
+       int devnum;
+       int b, i;
+       int raid_disks = 0;
 
        if (!sra)
                return 0;
 
-       for (d = sra->devs ; d ; d = d->next) {
+       for (d = sra->devs ; d ; d = d->next)
+               numdevs++;
+       for (d = sra->devs, devnum=0 ; d ; d = d->next, devnum++) {
                char dn[30];
                int dfd;
                int ok;
@@ -699,15 +731,21 @@ static int count_active(struct supertype *st, int mdfd, char **availp,
                close(dfd);
                if (ok != 0)
                        continue;
-               st->ss->getinfo_super(st, &info, NULL);
+               info.array.raid_disks = raid_disks;
+               st->ss->getinfo_super(st, &info, devmap + raid_disks * devnum);
                if (!avail) {
-                       avail = malloc(info.array.raid_disks);
+                       raid_disks = info.array.raid_disks;
+                       avail = calloc(raid_disks, 1);
                        if (!avail) {
                                fprintf(stderr, Name ": out of memory.\n");
                                exit(1);
                        }
-                       memset(avail, 0, info.array.raid_disks);
                        *availp = avail;
+
+                       best = calloc(raid_disks, sizeof(int));
+                       devmap = calloc(raid_disks * numdevs, 1);
+
+                       st->ss->getinfo_super(st, &info, devmap);
                }
 
                if (info.disk.state & (1<<MD_DISK_SYNC))
@@ -716,27 +754,28 @@ static int count_active(struct supertype *st, int mdfd, char **availp,
                                cnt++;
                                max_events = info.events;
                                avail[info.disk.raid_disk] = 2;
+                               best[info.disk.raid_disk] = devnum;
                                st->ss->getinfo_super(st, bestinfo, NULL);
                        } else if (info.events == max_events) {
-                               cnt++;
                                avail[info.disk.raid_disk] = 2;
+                               best[info.disk.raid_disk] = devnum;
                        } else if (info.events == max_events-1) {
-                               cnt1++;
-                               avail[info.disk.raid_disk] = 1;
+                               if (avail[info.disk.raid_disk] == 0) {
+                                       avail[info.disk.raid_disk] = 1;
+                                       best[info.disk.raid_disk] = devnum;
+                               }
                        } else if (info.events < max_events - 1)
                                ;
                        else if (info.events == max_events+1) {
                                int i;
-                               cnt1 = cnt;
-                               cnt = 1;
                                max_events = info.events;
-                               for (i=0; i<info.array.raid_disks; i++)
+                               for (i=0; i < raid_disks; i++)
                                        if (avail[i])
                                                avail[i]--;
                                avail[info.disk.raid_disk] = 2;
+                               best[info.disk.raid_disk] = devnum;
                                st->ss->getinfo_super(st, bestinfo, NULL);
                        } else { /* info.events much bigger */
-                               cnt = 1; cnt1 = 0;
                                memset(avail, 0, info.disk.raid_disk);
                                max_events = info.events;
                                avail[info.disk.raid_disk] = 2;
@@ -745,15 +784,44 @@ static int count_active(struct supertype *st, int mdfd, char **availp,
                }
                st->ss->free_super(st);
        }
-       return cnt + cnt1;
+       if (!avail)
+               return 0;
+       /* We need to reject any device that thinks the best device is
+        * failed or missing */
+       for (b = 0; b < raid_disks; b++)
+               if (avail[b] == 2)
+                       break;
+       cnt = 0;
+       for (i = 0 ; i < raid_disks ; i++) {
+               if (i != b && avail[i])
+                       if (devmap[raid_disks * best[i] + b] == 0) {
+                               /* This device thinks 'b' is failed -
+                                * don't use it */
+                               devnum = best[i];
+                               for (d=sra->devs ; devnum; d = d->next)
+                                       devnum--;
+                               d->disk.state |= (1 << MD_DISK_REMOVED);
+                               avail[i] = 0;
+                       }
+               if (avail[i])
+                       cnt++;
+       }
+       free(best);
+       free(devmap);
+       return cnt;
 }
 
 static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
+                          struct map_ent *target, int bare,
                           struct supertype *st, int verbose)
 {
        /* This device doesn't have any md metadata
-        * If it is 'bare' and theh device policy allows 'spare' look for
-        * an array or container to attach it to.
+        * The device policy allows 'spare' and if !bare, it allows spare-same-slot.
+        * If 'st' is not set, then we only know that some metadata allows this,
+        * others possibly don't.
+        * So look for a container or array to attach the device to.
+        * Prefer 'target' if that is set and the array is found.
+        *
         * If st is set, then only arrays of that type are considered
         * Return 0 on success, or some exit code on failure, probably 1.
         */
@@ -832,6 +900,9 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
                         */
                        if (!policy_action_allows(pol, st2->ss->name, act_spare))
                                goto next;
+                       if (!bare && !policy_action_allows(pol, st2->ss->name,
+                                                          act_spare_same_slot))
+                               goto next;
                } else
                        st2 = st;
                get_dev_size(dfd, NULL, &devsize);
@@ -850,6 +921,31 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
 
                        goto next;
                }
+               /* test against target.
+                * If 'target' is set and 'bare' is false, we only accept
+                * arrays/containers that match 'target'.
+                * If 'target' is set and 'bare' is true, we prefer the
+                * array which matches 'target'.
+                */
+               if (target) {
+                       if (strcmp(target->metadata, mp->metadata) == 0 &&
+                           memcmp(target->uuid, mp->uuid,
+                                  sizeof(target->uuid)) == 0) {
+                               /* This is our target!! */
+                               if (chosen)
+                                       sysfs_free(chosen);
+                               chosen = sra;
+                               sra = NULL;
+                               /* skip to end so we don't check any more */
+                               while (mp->next)
+                                       mp = mp->next;
+                               goto next;
+                       }
+                       /* not our target */
+                       if (!bare)
+                               goto next;
+               }
+
                /* all tests passed, OK to add to this array */
                if (!chosen) {
                        chosen = sra;
@@ -1082,6 +1178,7 @@ static int is_bare(int dfd)
  * Arrays are given priority over partitions.
  */
 static int try_spare(char *devname, int *dfdp, struct dev_policy *pol,
+                    struct map_ent *target,
                     struct supertype *st, int verbose)
 {
        int i;
@@ -1089,38 +1186,55 @@ static int try_spare(char *devname, int *dfdp, struct dev_policy *pol,
        int arrays_ok = 0;
        int partitions_ok = 0;
        int dfd = *dfdp;
+       int bare;
 
-       /* Can only add a spare if device has at least one domains */
+       /* Can only add a spare if device has at least one domain */
        if (pol_find(pol, pol_domain) == NULL)
                return 1;
        /* And only if some action allows spares */
        if (!policy_action_allows(pol, st?st->ss->name:NULL, act_spare))
                return 1;
 
-       /* Now check if the device is bare - we don't add non-bare devices
-        * yet even if action=-spare
+       /* Now check if the device is bare.
+        * bare devices can always be added as a spare
+        * non-bare devices can only be added if spare-same-slot is permitted,
+        * and this device is replacing a previous device - in which case 'target'
+        * will be set.
         */
-
        if (!is_bare(dfd)) {
-               if (verbose > 1)
-                       fprintf(stderr, Name ": %s is not bare, so not considering as a spare\n",
-                               devname);
-               return 1;
-       }
+               /* Must have a target and allow same_slot */
+               /* Later - may allow force_spare without target */
+               if (!target ||
+                   !policy_action_allows(pol, st?st->ss->name:NULL,
+                                         act_spare_same_slot)) {
+                       if (verbose > 1)
+                               fprintf(stderr, Name ": %s is not bare, so not "
+                                       "considering as a spare\n",
+                                       devname);
+                       return 1;
+               }
+               bare = 0;
+       } else
+               bare = 1;
 
-       /* This device passes our test for 'is bare'.
-        * Let's see what policy allows for such things.
+       /* It might be OK to add this device to an array - need to see
+        * what arrays might be candidates.
         */
        if (st) {
                /* just try try 'array' or 'partition' based on this metadata */
                if (st->ss->add_to_super)
-                       return array_try_spare(devname, dfdp, pol,
+                       return array_try_spare(devname, dfdp, pol, target, bare,
                                               st, verbose);
                else
                        return partition_try_spare(devname, dfdp, pol,
                                                   st, verbose);
        }
-       /* Now see which metadata type support spare */
+       /* No metadata was specified or found so options are open.
+        * Check for whether any array metadata, or any partition metadata
+        * might allow adding the spare.  This check is just help to avoid
+        * a more costly scan of all arrays when we can be sure that will
+        * fail.
+        */
        for (i = 0; (!arrays_ok || !partitions_ok) && superlist[i] ; i++) {
                if (superlist[i]->add_to_super && !arrays_ok &&
                    policy_action_allows(pol, superlist[i]->name, act_spare))
@@ -1131,7 +1245,8 @@ static int try_spare(char *devname, int *dfdp, struct dev_policy *pol,
        }
        rv = 1;
        if (arrays_ok)
-               rv = array_try_spare(devname, dfdp, pol, st, verbose);
+               rv = array_try_spare(devname, dfdp, pol, target, bare,
+                                    st, verbose);
        if (rv != 0 && partitions_ok)
                rv = partition_try_spare(devname, dfdp, pol, st, verbose);
        return rv;