]> git.ipfire.org Git - thirdparty/mdadm.git/commitdiff
Use action policy to keep recently-disconnected devices in the array.
authorNeilBrown <neilb@suse.de>
Mon, 23 Aug 2010 05:54:13 +0000 (15:54 +1000)
committerNeilBrown <neilb@suse.de>
Mon, 6 Sep 2010 01:26:27 +0000 (11:26 +1000)
When we find a device that was recently part of the array but is now
out of date (based on the event count) we might want to add it back in
(like --re-add) if the likely cause was a connection problem or we
might not if the likely cause was device failure.

So make this a policy issue: if action=re-add or better, try to re-add
any device that looks like it might be part of the array.

This applies:
  when we assemble the array:  old devices will be evicted by the
     kernel and need to be re-added.
  when we assemble the array during --incr for the same reason.
  when we find a device that could be added to a running array.

This doesn't affect arrays with external metadata at all.
For such arrays:
 When the container is assembled, the most recent instance of each
 device is included without reference to whether it is too old or not.
 Then the metadata handler must which slices of which devices to
 include in which array and with what state.  So the
 ->container_content should probably check the policy and compare the
 sequence numbers/event counts.
 When a device is added (--add) to a container with active arrays
 we only add as a 'spare'. --re-add doesn't seem to be an option.
 When a device is added with -I ->container_content gets another
 chance to assess things again.  So again it should check the policy.

Signed-off-by: NeilBrown <neilb@suse.de>
Assemble.c
Incremental.c

index afd4e60a283f7cfb641463e200775bda04b701b8..42f71fd47126940b63ae94381dca17f38e90f6c6 100644 (file)
@@ -1189,6 +1189,29 @@ int Assemble(struct supertype *st, char *mddev,
                                                                      (4 * content->array.chunk_size / 4096) + 1);
                                        }
                                }
+                               if (okcnt < (unsigned)content->array.raid_disks) {
+                                       /* If any devices did not get added
+                                        * because the kernel rejected them based
+                                        * on event count, try adding them
+                                        * again providing the action policy is
+                                        * 're-add' or greater.  The bitmap
+                                        * might allow them to be included, or
+                                        * they will become spares.
+                                        */
+                                       for (i = 0; i <= bestcnt; i++) {
+                                               int j = best[i];
+                                               if (j >= 0 && !devices[j].uptodate) {
+                                                       if (!disk_action_allows(&devices[j].i, st->ss->name, act_re_add))
+                                                               continue;
+                                                       rv = add_disk(mdfd, st, content,
+                                                                     &devices[j].i);
+                                                       if (rv == 0 && verbose >= 0)
+                                                               fprintf(stderr,
+                                                                       Name ": %s has been re-added.\n",
+                                                                       devices[j].devname);
+                                               }
+                                       }
+                               }
                                wait_for(mddev, mdfd);
                                close(mdfd);
                                if (auto_assem) {
index 4b9959400cb32c17b0faf4d7bc6e2b1af737d3ea..cfa7de94044762d6ba15ee3ffa46d5663fdba3b3 100644 (file)
@@ -78,7 +78,7 @@ int Incremental(char *devname, int verbose, int runstop,
         *   start the array (auto-readonly).
         */
        struct stat stb;
-       struct mdinfo info;
+       struct mdinfo info, dinfo;
        struct mddev_ident_s *array_list, *match;
        char chosen_name[1024];
        int rv = 1;
@@ -89,6 +89,7 @@ int Incremental(char *devname, int verbose, int runstop,
        int trustworthy = FOREIGN;
        char *name_to_use;
        mdu_array_info_t ainf;
+       struct dev_policy *policy = NULL;
 
        struct createinfo *ci = conf_get_create_info();
 
@@ -126,6 +127,11 @@ int Incremental(char *devname, int verbose, int runstop,
                goto out;
        }
 
+       dinfo.disk.major = major(stb.st_rdev);
+       dinfo.disk.minor = minor(stb.st_rdev);
+
+       policy = disk_policy(&dinfo);
+
        if (st == NULL && (st = guess_super(dfd)) == NULL) {
                if (verbose >= 0)
                        fprintf(stderr, Name
@@ -304,7 +310,6 @@ int Incremental(char *devname, int verbose, int runstop,
 
        if (mdfd < 0) {
                struct mdinfo *sra;
-               struct mdinfo dinfo;
 
                /* Couldn't find an existing array, maybe make a new one */
                mdfd = create_mddev(match ? match->devname : NULL,
@@ -372,11 +377,14 @@ int Incremental(char *devname, int verbose, int runstop,
                /* It is generally not OK to add non-spare drives to a
                 * running array as they are probably missing because
                 * they failed.  However if runstop is 1, then the
-                * array was possibly started early and our best be is
-                * to add this anyway.  It would probably be good to
-                * allow explicit policy statement about this.
+                * array was possibly started early and our best bet is
+                * to add this anyway.
+                * Also if action policy is re-add or better we allow
+                * re-add
                 */
                if ((info.disk.state & (1<<MD_DISK_SYNC)) != 0
+                   && ! policy_action_allows(policy, st->ss->name,
+                                             act_re_add)
                    && runstop < 1) {
                        int active = 0;
                        
@@ -510,7 +518,7 @@ int Incremental(char *devname, int verbose, int runstop,
 
        map_unlock(&map);
        if (runstop > 0 || active_disks >= info.array.working_disks) {
-               struct mdinfo *sra;
+               struct mdinfo *sra, *dsk;
                /* Let's try to start it */
                if (match && match->bitmap_file) {
                        int bmfd = open(match->bitmap_file, O_RDWR);
@@ -529,7 +537,9 @@ int Incremental(char *devname, int verbose, int runstop,
                        }
                        close(bmfd);
                }
-               sra = sysfs_read(mdfd, fd2devnum(mdfd), 0);
+               /* GET_* needed so add_disk works below */
+               sra = sysfs_read(mdfd, fd2devnum(mdfd),
+                                GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE);
                if ((sra == NULL || active_disks >= info.array.working_disks)
                    && trustworthy != FOREIGN)
                        rv = ioctl(mdfd, RUN_ARRAY, NULL);
@@ -539,10 +549,23 @@ int Incremental(char *devname, int verbose, int runstop,
                if (rv == 0) {
                        if (verbose >= 0)
                                fprintf(stderr, Name
-                          ": %s attached to %s, which has been started.\n",
+                                       ": %s attached to %s, which has been started.\n",
                                        devname, chosen_name);
                        rv = 0;
                        wait_for(chosen_name, mdfd);
+                       /* We just started the array, so some devices
+                        * might have been evicted from the array
+                        * because their event counts were too old.
+                        * If the action=re-add policy is in-force for
+                        * those devices we should re-add them now.
+                        */
+                       for (dsk = sra->devs; dsk ; dsk = dsk->next) {
+                               if (disk_action_allows(dsk, st->ss->name, act_re_add) &&
+                                   add_disk(mdfd, st, sra, dsk) == 0)
+                                       fprintf(stderr, Name
+                                               ": %s re-added to %s\n",
+                                               dsk->sys_name, chosen_name);
+                       }
                } else {
                        fprintf(stderr, Name
                              ": %s attached to %s, but failed to start: %s.\n",
@@ -561,6 +584,8 @@ out:
                close(dfd);
        if (mdfd >= 0)
                close(mdfd);
+       if (policy)
+               dev_policy_free(policy);
        return rv;
 }