]> git.ipfire.org Git - people/pmueller/ipfire-2.x.git/blobdiff - src/patches/suse-2.6.27.31/patches.fixes/md-disable-recovery-on-faulty-degraded-array
Move xen patchset to new version's subdir.
[people/pmueller/ipfire-2.x.git] / src / patches / suse-2.6.27.31 / patches.fixes / md-disable-recovery-on-faulty-degraded-array
diff --git a/src/patches/suse-2.6.27.31/patches.fixes/md-disable-recovery-on-faulty-degraded-array b/src/patches/suse-2.6.27.31/patches.fixes/md-disable-recovery-on-faulty-degraded-array
new file mode 100644 (file)
index 0000000..904fefb
--- /dev/null
@@ -0,0 +1,117 @@
+From: NeilBrown <neilb@suse.de>
+Subject: Disable recovery when degraded RAID1 array appears to be faulty.
+Patch-mainline: no
+References: bnc#447835
+
+
+If a raid1 has only one working drive and it has a sector which
+gives an error on read, then an attempt to recover onto a spare will
+fail, but as the single remaining drive is not removed from the
+array, the recovery will be immediately re-attempted, resulting
+in an infinite recovery loop.
+
+So detect this situation and don't retry recovery once an error
+on the lone remaining drive is detected.
+
+Allow recovery to be retried once every time a spare is added
+in case the problem wasn't actually a media error.
+    
+
+Signed-off-by: Neil Brown <neilb@suse.de>
+
+---
+ drivers/md/md.c           |   11 ++++++-----
+ drivers/md/raid1.c        |    8 ++++++--
+ include/linux/raid/md_k.h |    5 +++++
+ 3 files changed, 17 insertions(+), 7 deletions(-)
+
+--- a/drivers/md/md.c
++++ b/drivers/md/md.c
+@@ -3071,7 +3071,7 @@ action_store(mddev_t *mddev, const char
+                       set_bit(MD_RECOVERY_INTR, &mddev->recovery);
+                       md_unregister_thread(mddev->sync_thread);
+                       mddev->sync_thread = NULL;
+-                      mddev->recovery = 0;
++                      mddev->recovery &= ~65535;
+               }
+       } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
+                  test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
+@@ -4528,7 +4528,7 @@ static int set_bitmap_file(mddev_t *mdde
+       if (mddev->pers) {
+               if (!mddev->pers->quiesce)
+                       return -EBUSY;
+-              if (mddev->recovery || mddev->sync_thread)
++              if ((mddev->recovery & 65535) || mddev->sync_thread)
+                       return -EBUSY;
+               /* we should be able to change the bitmap.. */
+       }
+@@ -4783,7 +4783,7 @@ static int update_array_info(mddev_t *md
+       if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
+               if (mddev->pers->quiesce == NULL)
+                       return -EINVAL;
+-              if (mddev->recovery || mddev->sync_thread)
++              if ((mddev->recovery & 65535) || mddev->sync_thread)
+                       return -EBUSY;
+               if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
+                       /* add the bitmap */
+@@ -6048,7 +6048,8 @@ static int remove_and_add_spares(mddev_t
+                       }
+               }
+-      if (mddev->degraded && ! mddev->ro) {
++      if (mddev->degraded && ! mddev->ro &&
++          !test_bit(MD_RECOVERY_DISABLED, &mddev->recovery)) {
+               rdev_for_each(rdev, rtmp, mddev) {
+                       if (rdev->raid_disk >= 0 &&
+                           !test_bit(In_sync, &rdev->flags) &&
+@@ -6200,7 +6201,7 @@ void md_check_recovery(mddev_t *mddev)
+                               rdev_for_each(rdev, rtmp, mddev)
+                                       rdev->saved_raid_disk = -1;
+-                      mddev->recovery = 0;
++                      mddev->recovery &= ~65535;
+                       /* flag recovery needed just to double check */
+                       set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+                       sysfs_notify(&mddev->kobj, NULL, "sync_action");
+--- a/drivers/md/raid1.c
++++ b/drivers/md/raid1.c
+@@ -1012,12 +1012,16 @@ static void error(mddev_t *mddev, mdk_rd
+        * else mark the drive as failed
+        */
+       if (test_bit(In_sync, &rdev->flags)
+-          && (conf->raid_disks - mddev->degraded) == 1)
++          && (conf->raid_disks - mddev->degraded) == 1) {
+               /*
+                * Don't fail the drive, act as though we were just a
+-               * normal single drive
++               * normal single drive.
++               * Disable any future recovery attempts as they will
++               * likely hit an error on this device.
+                */
++              set_bit(MD_RECOVERY_DISABLED, &mddev->recovery);
+               return;
++      }
+       if (test_and_clear_bit(In_sync, &rdev->flags)) {
+               unsigned long flags;
+               spin_lock_irqsave(&conf->device_lock, flags);
+--- a/include/linux/raid/md_k.h
++++ b/include/linux/raid/md_k.h
+@@ -200,6 +200,8 @@ struct mddev_s
+        * RESHAPE:  A reshape is happening
+        *
+        * If neither SYNC or RESHAPE are set, then it is a recovery.
++       *
++       * DISABLED: read error on degraded array makes recovery impossible.
+        */
+ #define       MD_RECOVERY_RUNNING     0
+ #define       MD_RECOVERY_SYNC        1
+@@ -212,6 +214,9 @@ struct mddev_s
+ #define MD_RECOVERY_RESHAPE   8
+ #define       MD_RECOVERY_FROZEN      9
++
++#define       MD_RECOVERY_DISABLED    16
++
+       unsigned long                   recovery;
+       int                             in_sync;        /* know to not need resync */