]> git.ipfire.org Git - thirdparty/mdadm.git/commitdiff
handle disk failures
authorDan Williams <dan.j.williams@intel.com>
Thu, 15 May 2008 06:48:49 +0000 (16:48 +1000)
committerNeil Brown <neilb@suse.de>
Thu, 15 May 2008 06:48:49 +0000 (16:48 +1000)
From: Dan Williams <dan.j.williams@intel.com>

Added curr_state as a parameter to set_disk.  Handlers look at this to
record components failures, and set global 'degraded' or 'failed'
status.

When reading the state as faulty:
1/ mark the disk failed in the metadata

2/ write '-blocked' to the rdev state to allow the kernel's failure
   mechanism to advance

3/ the kernel will take away the drive's role in remove_and_add_spares()

4/ once the disk no longer has a role writing 'remove' to the rdev state
   will get the disk out of array.

There is a window after writing '-blocked' where the kernel will return
-EBUSY to remove requests.  We rely on the fact that the disk will
continue to show faulty so we lazily wait until the kernel is ready to
remove the disk.  If the manager thread needs to get the disk out of the
way it can ping the monitor and wait, just like the replace_array()
case.

[buglet fix: swap the parameters of attr_match in read_dev_state]

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
mdadm.h
monitor.c
super-ddf.c
super-intel.c

diff --git a/mdadm.h b/mdadm.h
index f3b4ec27cc98c64a48e37a4eca529a84cdc3b3aa..29cfbf2ba2176a357e54d509d8941ed0b87483a4 100644 (file)
--- a/mdadm.h
+++ b/mdadm.h
@@ -162,6 +162,12 @@ struct mdinfo {
 
        /* Device info for mdmon: */
        int state_fd;
+       #define DS_FAULTY       1
+       #define DS_INSYNC       2
+       #define DS_WRITE_MOSTLY 4
+       #define DS_SPARE        8
+       #define DS_BLOCKED      16
+       #define DS_REMOVE       1024
        int prev_state, curr_state, next_state;
 
 };
@@ -408,7 +414,7 @@ extern struct superswitch {
        void (*mark_clean)(struct active_array *a, unsigned long long sync_pos);
        void (*mark_dirty)(struct active_array *a);
        void (*mark_sync)(struct active_array *a, unsigned long long resync);
-       void (*set_disk)(struct active_array *a, int n);
+       void (*set_disk)(struct active_array *a, int n, int state);
        void (*sync_metadata)(struct active_array *a);
 
 
index 9e98aeb50c89b5c5b45964ff705ff9014730daf4..98d0219a8125918c1dbbddc32200498a3a44edcb 100644 (file)
--- a/monitor.c
+++ b/monitor.c
@@ -123,12 +123,6 @@ static enum sync_action read_action( int fd)
        return (enum sync_action) match_word(buf, sync_actions);
 }
 
-#define DS_FAULTY      1
-#define        DS_INSYNC       2
-#define        DS_WRITE_MOSTLY 4
-#define        DS_SPARE        8
-#define        DS_REMOVE       1024
-
 int read_dev_state(int fd)
 {
        char buf[60];
@@ -141,14 +135,16 @@ int read_dev_state(int fd)
 
        cp = buf;
        while (cp) {
-               if (attr_match("faulty", cp))
+               if (attr_match(cp, "faulty"))
                        rv |= DS_FAULTY;
-               if (attr_match("in_sync", cp))
+               if (attr_match(cp, "in_sync"))
                        rv |= DS_INSYNC;
-               if (attr_match("write_mostly", cp))
+               if (attr_match(cp, "write_mostly"))
                        rv |= DS_WRITE_MOSTLY;
-               if (attr_match("spare", cp))
+               if (attr_match(cp, "spare"))
                        rv |= DS_SPARE;
+               if (attr_match(cp, "blocked"))
+                       rv |= DS_BLOCKED;
                cp = strchr(cp, ',');
                if (cp)
                        cp++;
@@ -177,8 +173,9 @@ int read_dev_state(int fd)
  *
  *  device fails
  *    detected by rd-N/state reporting "faulty"
- *    mark device as 'failed' in metadata, the remove device
- *    by writing 'remove' to rd/state.
+ *    mark device as 'failed' in metadata, let the kernel release the
+ *    device by writing '-blocked' to rd/state, and finally write 'remove' to
+ *    rd/state
  *
  *  sync completes
  *    sync_action was 'resync' and becomes 'idle' and resync_start becomes
@@ -238,7 +235,8 @@ static int read_and_act(struct active_array *a)
        a->curr_action = read_action(a->action_fd);
        for (mdi = a->info.devs; mdi ; mdi = mdi->next) {
                mdi->next_state = 0;
-               mdi->curr_state = read_dev_state(mdi->state_fd);
+               if (mdi->state_fd > 0)
+                       mdi->curr_state = read_dev_state(mdi->state_fd);
        }
 
        if (a->curr_state <= inactive &&
@@ -285,7 +283,8 @@ static int read_and_act(struct active_array *a)
        if (a->curr_action == idle &&
            a->prev_action == recover) {
                for (mdi = a->info.devs ; mdi ; mdi = mdi->next) {
-                       a->container->ss->set_disk(a, mdi->disk.raid_disk);
+                       a->container->ss->set_disk(a, mdi->disk.raid_disk,
+                                                  mdi->curr_state);
                        if (! (mdi->curr_state & DS_INSYNC))
                                check_degraded = 1;
                }
@@ -294,7 +293,8 @@ static int read_and_act(struct active_array *a)
 
        for (mdi = a->info.devs ; mdi ; mdi = mdi->next) {
                if (mdi->curr_state & DS_FAULTY) {
-                       a->container->ss->set_disk(a, mdi->disk.raid_disk);
+                       a->container->ss->set_disk(a, mdi->disk.raid_disk,
+                                                  mdi->curr_state);
                        check_degraded = 1;
                        mdi->next_state = DS_REMOVE;
                }
@@ -312,8 +312,20 @@ static int read_and_act(struct active_array *a)
        if (a->next_action != bad_action)
                write_attr(sync_actions[a->next_action], a->action_fd);
        for (mdi = a->info.devs; mdi ; mdi = mdi->next) {
-               if (mdi->next_state == DS_REMOVE)
-                       write_attr("remove", mdi->state_fd);
+               if (mdi->next_state == DS_REMOVE && mdi->state_fd > 0) {
+                       int remove_err;
+
+                       write_attr("-blocked", mdi->state_fd);
+                       /* the kernel may not be able to immediately remove the
+                        * disk, we can simply wait until the next event to try
+                        * again.
+                        */
+                       remove_err = write_attr("remove", mdi->state_fd);
+                       if (!remove_err) {
+                               close(mdi->state_fd);
+                               mdi->state_fd = -1;
+                       }
+               }
                if (mdi->next_state & DS_INSYNC)
                        write_attr("+in_sync", mdi->state_fd);
        }
index 816a9753e423904fac3d2aece1c92f825e943745..10647f32eacbc4ea65c8a99c5ca1b9120bf96d70 100644 (file)
@@ -2464,7 +2464,7 @@ static void ddf_mark_sync(struct active_array *a, unsigned long long resync)
        fprintf(stderr, "ddf: mark sync\n");
 }
 
-static void ddf_set_disk(struct active_array *a, int n)
+static void ddf_set_disk(struct active_array *a, int n, int state)
 {
        fprintf(stderr, "ddf: set_disk %d\n", n);
 }
index 7ffcff82a0a195da7875f839aa67f079c5954a11..a72d0de4f136ab5bb3f5efe5a1d0d305212d56eb 100644 (file)
@@ -1251,9 +1251,48 @@ static void imsm_mark_sync(struct active_array *a, unsigned long long resync)
        }
 }
 
-static void imsm_set_disk(struct active_array *a, int n)
+static void imsm_set_disk(struct active_array *a, int n, int state)
 {
-       fprintf(stderr, "imsm: set_disk %d\n", n);
+       int inst = a->info.container_member;
+       struct intel_super *super = a->container->sb;
+       struct imsm_dev *dev = get_imsm_dev(super->mpb, inst);
+       struct imsm_map *map = dev->vol.map;
+       struct imsm_disk *disk;
+       __u32 status;
+       int failed = 0;
+       int new_failure = 0;
+
+       if (n > map->num_members)
+               fprintf(stderr, "imsm: set_disk %d out of range 0..%d\n",
+                       n, map->num_members - 1);
+
+       if (n < 0)
+               return;
+
+       fprintf(stderr, "imsm: set_disk %d:%x\n", n, state);
+
+       disk = get_imsm_disk(super->mpb, get_imsm_disk_idx(map, n));
+
+       /* check if we have seen this failure before */
+       status = __le32_to_cpu(disk->status);
+       if ((state & DS_FAULTY) && !(status & FAILED_DISK)) {
+               status |= FAILED_DISK;
+               disk->status = __cpu_to_le32(status);
+               new_failure = 1;
+       }
+
+       /**
+        * the number of failures have changed, count up 'failed' to determine
+        * degraded / failed status
+        */
+       if (new_failure && map->map_state != IMSM_T_STATE_FAILED)
+               failed = imsm_count_failed(super->mpb, map);
+
+       if (failed)
+               map->map_state = imsm_check_degraded(super->mpb, inst, failed);
+
+       if (new_failure)
+               super->updates_pending++;
 }
 
 static int store_imsm_mpb(int fd, struct intel_super *super)