mdmon: handle failures versus readauto arrays

author Dan Williams <dan.j.williams@intel.com>

Wed, 30 Jul 2008 02:25:15 +0000 (19:25 -0700)

committer Dan Williams <dan.j.williams@intel.com>

Fri, 15 Aug 2008 17:58:43 +0000 (10:58 -0700)
author Dan Williams <dan.j.williams@intel.com>
Wed, 30 Jul 2008 02:25:15 +0000 (19:25 -0700)
committer Dan Williams <dan.j.williams@intel.com>
Fri, 15 Aug 2008 17:58:43 +0000 (10:58 -0700)
diff --git a/mdadm.h b/mdadm.h

index 12eef2a2be10ef72419407695610639f8bd819ad..80a6f92f86b6c39d9d2649adf43a1ecff1c983d7 100644 (file)
--- a/mdadm.h
+++ b/mdadm.h
@@ -171,6 +171,7 @@ struct mdinfo {
         #define DS_SPARE        8
         #define DS_BLOCKED      16
         #define DS_REMOVE       1024
+       #define DS_UNBLOCK      2048
         int prev_state, curr_state, next_state;
  
  };
diff --git a/monitor.c b/monitor.c

index 382cad44b76add76d50a78e8546096b1eec6928e..ffb4c9c48dc7e439cc1da352949f6c2aa6032f5b 100644 (file)
--- a/monitor.c
+++ b/monitor.c
@@ -284,12 +284,25 @@ static int read_and_act(struct active_array *a)
                 }
         }
  
+       /* Check for failures and if found:
+        * 1/ Record the failure in the metadata and unblock the device.
+        *    FIXME update the kernel to stop notifying on failed drives when
+        *    the array is readonly and we have cleared 'blocked'
+        * 2/ Try to remove the device if the array is writable, or can be
+        *    made writable.
+        */
         for (mdi = a->info.devs ; mdi ; mdi = mdi->next) {
                 if (mdi->curr_state & DS_FAULTY) {
                         a->container->ss->set_disk(a, mdi->disk.raid_disk,
                                                    mdi->curr_state);
                         check_degraded = 1;
-                       mdi->next_state = DS_REMOVE;
+                       mdi->next_state |= DS_UNBLOCK;
+                       if (a->curr_state == read_auto) {
+                               a->container->ss->set_array_state(a, 0);
+                               a->next_state = active;
+                       }
+                       if (a->curr_state > readonly)
+                               mdi->next_state |= DS_REMOVE;
                 }
         }
  
@@ -306,15 +319,18 @@ static int read_and_act(struct active_array *a)
                 dprintf(" action:%s", array_states[a->next_state]);
         }
         for (mdi = a->info.devs; mdi ; mdi = mdi->next) {
-               if (mdi->next_state == DS_REMOVE && mdi->state_fd >= 0) {
+               if (mdi->next_state & DS_UNBLOCK) {
+                       dprintf(" %d:-blocked", mdi->disk.raid_disk);
+                       write_attr("-blocked", mdi->state_fd);
+               }
+
+               if ((mdi->next_state & DS_REMOVE) && mdi->state_fd >= 0) {
                         int remove_result;
  
-                       write_attr("-blocked", mdi->state_fd);
                         /* the kernel may not be able to immediately remove the
                          * disk, we can simply wait until the next event to try
                          * again.
                          */
-                       dprintf(" %d:-blocked", mdi->disk.raid_disk);
                         remove_result = write_attr("remove", mdi->state_fd);
                         if (remove_result > 0) {
                                 dprintf(" %d:removed", mdi->disk.raid_disk);
author	Dan Williams <dan.j.williams@intel.com>
	Wed, 30 Jul 2008 02:25:15 +0000 (19:25 -0700)
committer	Dan Williams <dan.j.williams@intel.com>
	Fri, 15 Aug 2008 17:58:43 +0000 (10:58 -0700)
mdadm.h		patch \| blob \| blame \| history
monitor.c		patch \| blob \| blame \| history