]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - Incremental.c
Add mbr pseudo metadata handler.
[thirdparty/mdadm.git] / Incremental.c
index d6dd0f43bef1e84786cb7dfb9fb75607edff3856..bcbd78d0b4abe67ec2069922936be39447a609c5 100644 (file)
@@ -35,6 +35,8 @@ static int count_active(struct supertype *st, int mdfd, char **availp,
 static void find_reject(int mdfd, struct supertype *st, struct mdinfo *sra,
                        int number, __u64 events, int verbose,
                        char *array_name);
+static int try_spare(char *devname, int *dfdp, struct dev_policy *pol,
+                    struct supertype *st, int verbose);
 
 int Incremental(char *devname, int verbose, int runstop,
                struct supertype *st, char *homehost, int require_homehost,
@@ -78,17 +80,18 @@ int Incremental(char *devname, int verbose, int runstop,
         *   start the array (auto-readonly).
         */
        struct stat stb;
-       struct mdinfo info;
+       struct mdinfo info, dinfo;
        struct mddev_ident_s *array_list, *match;
        char chosen_name[1024];
-       int rv;
+       int rv = 1;
        struct map_ent *mp, *map = NULL;
-       int dfd, mdfd;
+       int dfd = -1, mdfd = -1;
        char *avail;
        int active_disks;
        int trustworthy = FOREIGN;
        char *name_to_use;
        mdu_array_info_t ainf;
+       struct dev_policy *policy = NULL;
 
        struct createinfo *ci = conf_get_create_info();
 
@@ -100,7 +103,7 @@ int Incremental(char *devname, int verbose, int runstop,
                        fprintf(stderr, Name
                                ": %s not permitted by mdadm.conf.\n",
                                devname);
-               return 1;
+               goto out;
        }
 
        /* 2/ Find metadata, reject if none appropriate (check
@@ -111,39 +114,44 @@ int Incremental(char *devname, int verbose, int runstop,
                if (verbose >= 0)
                        fprintf(stderr, Name ": cannot open %s: %s.\n",
                                devname, strerror(errno));
-               return 1;
+               goto out;
        }
        if (fstat(dfd, &stb) < 0) {
                if (verbose >= 0)
                        fprintf(stderr, Name ": fstat failed for %s: %s.\n",
                                devname, strerror(errno));
-               close(dfd);
-               return 1;
+               goto out;
        }
        if ((stb.st_mode & S_IFMT) != S_IFBLK) {
                if (verbose >= 0)
                        fprintf(stderr, Name ": %s is not a block device.\n",
                                devname);
-               close(dfd);
-               return 1;
+               goto out;
        }
 
+       dinfo.disk.major = major(stb.st_rdev);
+       dinfo.disk.minor = minor(stb.st_rdev);
+
+       policy = disk_policy(&dinfo);
+
        if (st == NULL && (st = guess_super(dfd)) == NULL) {
                if (verbose >= 0)
                        fprintf(stderr, Name
                                ": no recognisable superblock on %s.\n",
                                devname);
-               close(dfd);
-               return 1;
+               rv = try_spare(devname, &dfd, policy, st, verbose);
+               goto out;
        }
-       if (st->ss->load_super(st, dfd, NULL)) {
+       if (st->ss->compare_super == NULL ||
+           st->ss->load_super(st, dfd, NULL)) {
                if (verbose >= 0)
                        fprintf(stderr, Name ": no RAID superblock on %s.\n",
                                devname);
-               close(dfd);
-               return 1;
+               rv = try_spare(devname, &dfd, policy, st, verbose);
+               free(st);
+               goto out;
        }
-       close (dfd);
+       close (dfd); dfd = -1;
 
        memset(&info, 0, sizeof(info));
        st->ss->getinfo_super(st, &info);
@@ -207,7 +215,8 @@ int Incremental(char *devname, int verbose, int runstop,
                                        fprintf(stderr, Name
                                                ": multiple lines in mdadm.conf match\n");
                        }
-                       return 2;
+                       rv = 2;
+                       goto out;
                }
                match = array_list;
        }
@@ -218,7 +227,7 @@ int Incremental(char *devname, int verbose, int runstop,
                        fprintf(stderr, Name ": array containing %s is explicitly"
                                " ignored by mdadm.conf\n",
                                devname);
-               return 1;
+               goto out;
        }
 
        /* 3a/ if not, check for homehost match.  If no match, continue
@@ -242,7 +251,7 @@ int Incremental(char *devname, int verbose, int runstop,
                                ": %s has metadata type %s for which "
                                "auto-assembly is disabled\n",
                                devname, st->ss->name);
-               return 1;
+               goto out;
        }
        if (trustworthy == LOCAL_ANY)
                trustworthy = LOCAL;
@@ -264,14 +273,16 @@ int Incremental(char *devname, int verbose, int runstop,
                else {
                        if (verbose)
                                fprintf(stderr, Name ": not enough devices to start the container\n");
-                       return 1;
+                       rv = 0;
+                       goto out;
                }
 
                /* This is a pre-built container array, so we do something
                 * rather different.
                 */
-               return Incremental_container(st, devname, verbose, runstop,
+               rv = Incremental_container(st, devname, verbose, runstop,
                                             autof, trustworthy);
+               goto out;
        }
 
        name_to_use = info.name;
@@ -294,7 +305,9 @@ int Incremental(char *devname, int verbose, int runstop,
 
        /* 4/ Check if array exists.
         */
-       map_lock(&map);
+       if (map_lock(&map))
+               fprintf(stderr, Name ": failed to get exclusive lock on "
+                       "mapfile\n");
        mp = map_by_uuid(&map, info.uuid);
        if (mp)
                mdfd = open_dev(mp->devnum);
@@ -303,22 +316,21 @@ int Incremental(char *devname, int verbose, int runstop,
 
        if (mdfd < 0) {
                struct mdinfo *sra;
-               struct mdinfo dinfo;
 
                /* Couldn't find an existing array, maybe make a new one */
                mdfd = create_mddev(match ? match->devname : NULL,
                                    name_to_use, autof, trustworthy, chosen_name);
 
                if (mdfd < 0)
-                       return 1;
+                       goto out;
 
                sysfs_init(&info, mdfd, 0);
 
                if (set_array_info(mdfd, st, &info) != 0) {
                        fprintf(stderr, Name ": failed to set array info for %s: %s\n",
                                chosen_name, strerror(errno));
-                       close(mdfd);
-                       return 2;
+                       rv = 2;
+                       goto out;
                }
 
                dinfo = info;
@@ -328,8 +340,8 @@ int Incremental(char *devname, int verbose, int runstop,
                        fprintf(stderr, Name ": failed to add %s to %s: %s.\n",
                                devname, chosen_name, strerror(errno));
                        ioctl(mdfd, STOP_ARRAY, 0);
-                       close(mdfd);
-                       return 2;
+                       rv = 2;
+                       goto out;
                }
                sra = sysfs_read(mdfd, fd2devnum(mdfd), GET_DEVS);
                if (!sra || !sra->devs || sra->devs->disk.raid_disk >= 0) {
@@ -341,9 +353,9 @@ int Incremental(char *devname, int verbose, int runstop,
                        fprintf(stderr, Name
                      ": You have an old buggy kernel which cannot support\n"
                                "      --incremental reliably.  Aborting.\n");
-                       close(mdfd);
                        sysfs_free(sra);
-                       return 2;
+                       rv = 2;
+                       goto out;
                }
                info.array.working_disks = 1;
                sysfs_free(sra);
@@ -368,8 +380,40 @@ int Incremental(char *devname, int verbose, int runstop,
                else
                        strcpy(chosen_name, devnum2devname(mp->devnum));
 
-               sra = sysfs_read(mdfd, fd2devnum(mdfd), (GET_DEVS | GET_STATE));
+               /* It is generally not OK to add non-spare drives to a
+                * running array as they are probably missing because
+                * they failed.  However if runstop is 1, then the
+                * array was possibly started early and our best bet is
+                * to add this anyway.
+                * Also if action policy is re-add or better we allow
+                * re-add
+                */
+               if ((info.disk.state & (1<<MD_DISK_SYNC)) != 0
+                   && ! policy_action_allows(policy, st->ss->name,
+                                             act_re_add)
+                   && runstop < 1) {
+                       int active = 0;
+                       
+                       if (st->ss->external) {
+                               char *devname = devnum2devname(fd2devnum(mdfd));
 
+                               active = devname && is_container_active(devname);
+                               free(devname);
+                       } else if (ioctl(mdfd, GET_ARRAY_INFO, &ainf) == 0)
+                               active = 1;
+                       if (active) {
+                               fprintf(stderr, Name
+                                       ": not adding %s to active array (without --run) %s\n",
+                                       devname, chosen_name);
+                               rv = 2;
+                               goto out;
+                       }
+               }
+               sra = sysfs_read(mdfd, fd2devnum(mdfd), (GET_DEVS | GET_STATE));
+               if (!sra) {
+                       rv = 2;
+                       goto out;
+               }
                if (sra->devs) {
                        sprintf(dn, "%d:%d", sra->devs->disk.major,
                                sra->devs->disk.minor);
@@ -381,9 +425,9 @@ int Incremental(char *devname, int verbose, int runstop,
                                        ": metadata mismatch between %s and "
                                        "chosen array %s\n",
                                        devname, chosen_name);
-                               close(mdfd);
                                close(dfd2);
-                               return 2;
+                               rv = 2;
+                               goto out;
                        }
                        close(dfd2);
                        memset(&info2, 0, sizeof(info2));
@@ -395,8 +439,8 @@ int Incremental(char *devname, int verbose, int runstop,
                                fprintf(stderr, Name
                                        ": unexpected difference between %s and %s.\n",
                                        chosen_name, devname);
-                               close(mdfd);
-                               return 2;
+                               rv = 2;
+                               goto out;
                        }
                }
                info2.disk.major = major(stb.st_rdev);
@@ -416,8 +460,8 @@ int Incremental(char *devname, int verbose, int runstop,
                if (err < 0) {
                        fprintf(stderr, Name ": failed to add %s to %s: %s.\n",
                                devname, chosen_name, strerror(errno));
-                       close(mdfd);
-                       return 2;
+                       rv = 2;
+                       goto out;
                }
                info.array.working_disks = 0;
                for (d = sra->devs; d; d=d->next)
@@ -457,8 +501,8 @@ int Incremental(char *devname, int verbose, int runstop,
                             ": %s attached to %s, not enough to start (%d).\n",
                                devname, chosen_name, active_disks);
                map_unlock(&map);
-               close(mdfd);
-               return 0;
+               rv = 0;
+               goto out;
        }
        free(avail);
 
@@ -473,14 +517,14 @@ int Incremental(char *devname, int verbose, int runstop,
                        fprintf(stderr, Name
                           ": %s attached to %s which is already active.\n",
                                devname, chosen_name);
-               close(mdfd);
                map_unlock(&map);
-               return 0;
+               rv = 0;
+               goto out;
        }
 
        map_unlock(&map);
        if (runstop > 0 || active_disks >= info.array.working_disks) {
-               struct mdinfo *sra;
+               struct mdinfo *sra, *dsk;
                /* Let's try to start it */
                if (match && match->bitmap_file) {
                        int bmfd = open(match->bitmap_file, O_RDWR);
@@ -488,20 +532,20 @@ int Incremental(char *devname, int verbose, int runstop,
                                fprintf(stderr, Name
                                        ": Could not open bitmap file %s.\n",
                                        match->bitmap_file);
-                               close(mdfd);
-                               return 1;
+                               goto out;
                        }
                        if (ioctl(mdfd, SET_BITMAP_FILE, bmfd) != 0) {
                                close(bmfd);
                                fprintf(stderr, Name
                                        ": Failed to set bitmapfile for %s.\n",
                                        chosen_name);
-                               close(mdfd);
-                               return 1;
+                               goto out;
                        }
                        close(bmfd);
                }
-               sra = sysfs_read(mdfd, fd2devnum(mdfd), 0);
+               /* GET_* needed so add_disk works below */
+               sra = sysfs_read(mdfd, fd2devnum(mdfd),
+                                GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE);
                if ((sra == NULL || active_disks >= info.array.working_disks)
                    && trustworthy != FOREIGN)
                        rv = ioctl(mdfd, RUN_ARRAY, NULL);
@@ -511,10 +555,23 @@ int Incremental(char *devname, int verbose, int runstop,
                if (rv == 0) {
                        if (verbose >= 0)
                                fprintf(stderr, Name
-                          ": %s attached to %s, which has been started.\n",
+                                       ": %s attached to %s, which has been started.\n",
                                        devname, chosen_name);
                        rv = 0;
                        wait_for(chosen_name, mdfd);
+                       /* We just started the array, so some devices
+                        * might have been evicted from the array
+                        * because their event counts were too old.
+                        * If the action=re-add policy is in-force for
+                        * those devices we should re-add them now.
+                        */
+                       for (dsk = sra->devs; dsk ; dsk = dsk->next) {
+                               if (disk_action_allows(dsk, st->ss->name, act_re_add) &&
+                                   add_disk(mdfd, st, sra, dsk) == 0)
+                                       fprintf(stderr, Name
+                                               ": %s re-added to %s\n",
+                                               dsk->sys_name, chosen_name);
+                       }
                } else {
                        fprintf(stderr, Name
                              ": %s attached to %s, but failed to start: %s.\n",
@@ -528,7 +585,13 @@ int Incremental(char *devname, int verbose, int runstop,
                                devname, chosen_name);
                rv = 0;
        }
-       close(mdfd);
+out:
+       if (dfd >= 0)
+               close(dfd);
+       if (mdfd >= 0)
+               close(mdfd);
+       if (policy)
+               dev_policy_free(policy);
        return rv;
 }
 
@@ -586,6 +649,9 @@ static int count_active(struct supertype *st, int mdfd, char **availp,
        struct mdinfo *sra = sysfs_read(mdfd, -1, GET_DEVS | GET_STATE);
        char *avail = NULL;
 
+       if (!sra)
+               return 0;
+
        for (d = sra->devs ; d ; d = d->next) {
                char dn[30];
                int dfd;
@@ -648,6 +714,178 @@ static int count_active(struct supertype *st, int mdfd, char **availp,
        return cnt + cnt1;
 }
 
+static int try_spare(char *devname, int *dfdp, struct dev_policy *pol,
+                    struct supertype *st, int verbose)
+{
+       /* This device doesn't have any md metadata
+        * If it is 'bare' and theh device policy allows 'spare' look for
+        * an array or container to attach it to.
+        * If st is set, then only arrays of that type are considered
+        * Return 0 on success, or some exit code on failure, probably 1.
+        */
+       int rv = -1;
+       char bufpad[4096 + 4096];
+       char *buf = (char*)(((long)bufpad + 4096) & ~4095);
+       struct stat stb;
+       struct map_ent *mp, *map = NULL;
+       struct mdinfo *chosen = NULL;
+       int dfd = *dfdp;
+
+       /* First check policy */
+       if (!policy_action_allows(pol, st?st->ss->name:NULL, act_spare))
+               return 1;
+
+       if (fstat(dfd, &stb) != 0)
+               return 1;
+       /* Now check if the device is bare - we don't add non-bare devices
+        * yet even if action=-spare
+        */
+
+       if (lseek(dfd, 0, SEEK_SET) != 0 ||
+           read(dfd, buf, 4096) != 4096) {
+       not_bare:
+               if (verbose > 1)
+                       fprintf(stderr, Name ": %s is not bare, so not considering as a spare\n",
+                               devname);
+               return 1;
+       }
+       if (buf[0] != '\0' && buf[0] != '\x5a' && buf[0] != '\xff')
+               goto not_bare;
+       if (memcmp(buf, buf+1, 4095) != 0)
+               goto not_bare;
+
+       /* OK, first 4K appear blank, try the end. */
+       if (lseek(dfd, -4096, SEEK_END) < 0 ||
+           read(dfd, buf, 4096) != 4096)
+               goto not_bare;
+
+       if (buf[0] != '\0' && buf[0] != '\x5a' && buf[0] != '\xff')
+               goto not_bare;
+       if (memcmp(buf, buf+1, 4095) != 0)
+               goto not_bare;
+
+       /* This device passes our test for 'is bare'.
+        * Now we need to find a suitable array to add this to.
+        * We only accept arrays that:
+        *  - match 'st'
+        *  - are in the same domains as the device
+        *  - are of an size for which the device will be useful
+        * and we choose the one that is the most degraded
+        */
+
+       if (map_lock(&map)) {
+               fprintf(stderr, Name ": failed to get exclusive lock on "
+                       "mapfile\n");
+               return 1;
+       }
+       for (mp = map ; mp ; mp = mp->next) {
+               struct supertype *st2;
+               struct domainlist *dl = NULL;
+               struct mdinfo *sra;
+               unsigned long long devsize;
+
+               if (is_subarray(mp->metadata))
+                       continue;
+               if (st) {
+                       st2 = st->ss->match_metadata_desc(mp->metadata);
+                       if (!st2 ||
+                           (st->minor_version >= 0 &&
+                            st->minor_version != st2->minor_version)) {
+                               if (verbose > 1)
+                                       fprintf(stderr, Name ": not adding %s to %s as metadata type doesn't match\n",
+                                               devname, mp->path);
+                               free(st2);
+                               continue;
+                       }
+                       free(st2);
+               }
+               sra = sysfs_read(-1, mp->devnum,
+                                GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE|
+                                GET_DEGRADED|GET_COMPONENT|GET_VERSION);
+               if (!sra) {
+                       /* Probably a container - no degraded info */
+                       sra = sysfs_read(-1, mp->devnum,
+                                        GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE|
+                                        GET_COMPONENT|GET_VERSION);
+                       if (sra)
+                               sra->array.failed_disks = 0;
+               }
+               if (!sra)
+                       continue;
+               if (st == NULL) {
+                       int i;
+                       st2 = NULL;
+                       for(i=0; !st2 && superlist[i]; i++)
+                               st2 = superlist[i]->match_metadata_desc(
+                                       sra->text_version);
+               } else
+                       st2 = st;
+               get_dev_size(dfd, NULL, &devsize);
+               if (st2->ss->avail_size(st2, devsize) < sra->component_size) {
+                       if (verbose > 1)
+                               fprintf(stderr, Name ": not adding %s to %s as it is too small\n",
+                                       devname, mp->path);
+                       goto next;
+               }
+               dl = domain_from_array(sra, st2->ss->name);
+               if (!domain_test(dl, pol, st2->ss->name)) {
+                       /* domain test fails */
+                       if (verbose > 1)
+                               fprintf(stderr, Name ": not adding %s to %s as it is not in a compatible domain\n",
+                                       devname, mp->path);
+
+                       goto next;
+               }
+               /* all tests passed, OK to add to this array */
+               if (!chosen) {
+                       chosen = sra;
+                       sra = NULL;
+               } else if (chosen->array.failed_disks < sra->array.failed_disks) {
+                       sysfs_free(chosen);
+                       chosen = sra;
+                       sra = NULL;
+               }
+       next:
+               if (sra)
+                       sysfs_free(sra);
+               if (st != st2)
+                       free(st2);
+               if (dl)
+                       domain_free(dl);
+       }
+       if (chosen) {
+               /* add current device to chosen array as a spare */
+               int mdfd = open_dev(devname2devnum(chosen->sys_name));
+               if (mdfd >= 0) {
+                       struct mddev_dev_s devlist;
+                       char devname[20];
+                       devlist.next = NULL;
+                       devlist.used = 0;
+                       devlist.re_add = 0;
+                       devlist.writemostly = 0;
+                       devlist.devname = devname;
+                       sprintf(devname, "%d:%d", major(stb.st_rdev),
+                               minor(stb.st_rdev));
+                       devlist.disposition = 'a';
+                       close(dfd);
+                       *dfdp = -1;
+                       rv =  Manage_subdevs(chosen->sys_name, mdfd, &devlist,
+                                            -1, 0);
+                       close(mdfd);
+               }
+               if (verbose > 0) {
+                       if (rv == 0)
+                               fprintf(stderr, Name ": added %s as spare for %s\n",
+                                       devname, chosen->sys_name);
+                       else
+                               fprintf(stderr, Name ": failed to add %s as spare for %s\n",
+                                       devname, chosen->sys_name);
+               }
+               sysfs_free(chosen);
+       }
+       return rv ? 0 : 1;
+}
+
 int IncrementalScan(int verbose)
 {
        /* look at every device listed in the 'map' file.
@@ -763,7 +1001,9 @@ int Incremental_container(struct supertype *st, char *devname, int verbose,
        struct mdinfo *ra;
        struct map_ent *map = NULL;
 
-       map_lock(&map);
+       if (map_lock(&map))
+               fprintf(stderr, Name ": failed to get exclusive lock on "
+                       "mapfile\n");
 
        for (ra = list ; ra ; ra = ra->next) {
                int mdfd;
@@ -849,3 +1089,45 @@ int Incremental_container(struct supertype *st, char *devname, int verbose,
        map_unlock(&map);
        return 0;
 }
+
+/*
+ * IncrementalRemove - Attempt to see if the passed in device belongs to any
+ * raid arrays, and if so first fail (if needed) and then remove the device.
+ *
+ * @devname - The device we want to remove
+ *
+ * Note: the device name must be a kernel name like "sda", so
+ * that we can find it in /proc/mdstat
+ */
+int IncrementalRemove(char *devname, int verbose)
+{
+       int mdfd;
+       int rv;
+       struct mdstat_ent *ent;
+       struct mddev_dev_s devlist;
+
+       if (strchr(devname, '/')) {
+               fprintf(stderr, Name ": incremental removal requires a "
+                       "kernel device name, not a file: %s\n", devname);
+               return 1;
+       }
+       ent = mdstat_by_component(devname);
+       if (!ent) {
+               fprintf(stderr, Name ": %s does not appear to be a component "
+                       "of any array\n", devname);
+               return 1;
+       }
+       mdfd = open_dev(ent->devnum);
+       if (mdfd < 0) {
+               fprintf(stderr, Name ": Cannot open array %s!!\n", ent->dev);
+               return 1;
+       }
+       memset(&devlist, 0, sizeof(devlist));
+       devlist.devname = devname;
+       devlist.disposition = 'f';
+       Manage_subdevs(ent->dev, mdfd, &devlist, verbose, 0);
+       devlist.disposition = 'r';
+       rv = Manage_subdevs(ent->dev, mdfd, &devlist, verbose, 0);
+       close(mdfd);
+       return rv;
+}