]> git.ipfire.org Git - thirdparty/mdadm.git/commitdiff
mdmon: handle failures versus readauto arrays
authorDan Williams <dan.j.williams@intel.com>
Wed, 30 Jul 2008 02:25:15 +0000 (19:25 -0700)
committerDan Williams <dan.j.williams@intel.com>
Fri, 15 Aug 2008 17:58:43 +0000 (10:58 -0700)
Transition readauto arrays to active before failing drives.

Hmm... why do we keep reblocking / renotifying in the readonly case?
Need to bottom out on this, but not right now.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
mdadm.h
monitor.c

diff --git a/mdadm.h b/mdadm.h
index 12eef2a2be10ef72419407695610639f8bd819ad..80a6f92f86b6c39d9d2649adf43a1ecff1c983d7 100644 (file)
--- a/mdadm.h
+++ b/mdadm.h
@@ -171,6 +171,7 @@ struct mdinfo {
        #define DS_SPARE        8
        #define DS_BLOCKED      16
        #define DS_REMOVE       1024
+       #define DS_UNBLOCK      2048
        int prev_state, curr_state, next_state;
 
 };
index 382cad44b76add76d50a78e8546096b1eec6928e..ffb4c9c48dc7e439cc1da352949f6c2aa6032f5b 100644 (file)
--- a/monitor.c
+++ b/monitor.c
@@ -284,12 +284,25 @@ static int read_and_act(struct active_array *a)
                }
        }
 
+       /* Check for failures and if found:
+        * 1/ Record the failure in the metadata and unblock the device.
+        *    FIXME update the kernel to stop notifying on failed drives when
+        *    the array is readonly and we have cleared 'blocked'
+        * 2/ Try to remove the device if the array is writable, or can be
+        *    made writable.
+        */
        for (mdi = a->info.devs ; mdi ; mdi = mdi->next) {
                if (mdi->curr_state & DS_FAULTY) {
                        a->container->ss->set_disk(a, mdi->disk.raid_disk,
                                                   mdi->curr_state);
                        check_degraded = 1;
-                       mdi->next_state = DS_REMOVE;
+                       mdi->next_state |= DS_UNBLOCK;
+                       if (a->curr_state == read_auto) {
+                               a->container->ss->set_array_state(a, 0);
+                               a->next_state = active;
+                       }
+                       if (a->curr_state > readonly)
+                               mdi->next_state |= DS_REMOVE;
                }
        }
 
@@ -306,15 +319,18 @@ static int read_and_act(struct active_array *a)
                dprintf(" action:%s", array_states[a->next_state]);
        }
        for (mdi = a->info.devs; mdi ; mdi = mdi->next) {
-               if (mdi->next_state == DS_REMOVE && mdi->state_fd >= 0) {
+               if (mdi->next_state & DS_UNBLOCK) {
+                       dprintf(" %d:-blocked", mdi->disk.raid_disk);
+                       write_attr("-blocked", mdi->state_fd);
+               }
+
+               if ((mdi->next_state & DS_REMOVE) && mdi->state_fd >= 0) {
                        int remove_result;
 
-                       write_attr("-blocked", mdi->state_fd);
                        /* the kernel may not be able to immediately remove the
                         * disk, we can simply wait until the next event to try
                         * again.
                         */
-                       dprintf(" %d:-blocked", mdi->disk.raid_disk);
                        remove_result = write_attr("remove", mdi->state_fd);
                        if (remove_result > 0) {
                                dprintf(" %d:removed", mdi->disk.raid_disk);