]> git.ipfire.org Git - people/pmueller/ipfire-2.x.git/blame - src/patches/suse-2.6.27.25/patches.fixes/md-disable-recovery-on-faulty-degraded-array
Changed checkfs to auto reboot after correctable fsck fixes.
[people/pmueller/ipfire-2.x.git] / src / patches / suse-2.6.27.25 / patches.fixes / md-disable-recovery-on-faulty-degraded-array
CommitLineData
00e5a55c
BS
1From: NeilBrown <neilb@suse.de>
2Subject: Disable recovery when degraded RAID1 array appears to be faulty.
3Patch-mainline: no
4References: bnc#447835
5
6
7If a raid1 has only one working drive and it has a sector which
8gives an error on read, then an attempt to recover onto a spare will
9fail, but as the single remaining drive is not removed from the
10array, the recovery will be immediately re-attempted, resulting
11in an infinite recovery loop.
12
13So detect this situation and don't retry recovery once an error
14on the lone remaining drive is detected.
15
16Allow recovery to be retried once every time a spare is added
17in case the problem wasn't actually a media error.
18
19
20Signed-off-by: Neil Brown <neilb@suse.de>
21
22---
23 drivers/md/md.c | 11 ++++++-----
24 drivers/md/raid1.c | 8 ++++++--
25 include/linux/raid/md_k.h | 5 +++++
26 3 files changed, 17 insertions(+), 7 deletions(-)
27
28--- a/drivers/md/md.c
29+++ b/drivers/md/md.c
30@@ -3071,7 +3071,7 @@ action_store(mddev_t *mddev, const char
31 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
32 md_unregister_thread(mddev->sync_thread);
33 mddev->sync_thread = NULL;
34- mddev->recovery = 0;
35+ mddev->recovery &= ~65535;
36 }
37 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
38 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
39@@ -4528,7 +4528,7 @@ static int set_bitmap_file(mddev_t *mdde
40 if (mddev->pers) {
41 if (!mddev->pers->quiesce)
42 return -EBUSY;
43- if (mddev->recovery || mddev->sync_thread)
44+ if ((mddev->recovery & 65535) || mddev->sync_thread)
45 return -EBUSY;
46 /* we should be able to change the bitmap.. */
47 }
48@@ -4783,7 +4783,7 @@ static int update_array_info(mddev_t *md
49 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
50 if (mddev->pers->quiesce == NULL)
51 return -EINVAL;
52- if (mddev->recovery || mddev->sync_thread)
53+ if ((mddev->recovery & 65535) || mddev->sync_thread)
54 return -EBUSY;
55 if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
56 /* add the bitmap */
57@@ -6048,7 +6048,8 @@ static int remove_and_add_spares(mddev_t
58 }
59 }
60
61- if (mddev->degraded && ! mddev->ro) {
62+ if (mddev->degraded && ! mddev->ro &&
63+ !test_bit(MD_RECOVERY_DISABLED, &mddev->recovery)) {
64 rdev_for_each(rdev, rtmp, mddev) {
65 if (rdev->raid_disk >= 0 &&
66 !test_bit(In_sync, &rdev->flags) &&
67@@ -6200,7 +6201,7 @@ void md_check_recovery(mddev_t *mddev)
68 rdev_for_each(rdev, rtmp, mddev)
69 rdev->saved_raid_disk = -1;
70
71- mddev->recovery = 0;
72+ mddev->recovery &= ~65535;
73 /* flag recovery needed just to double check */
74 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
75 sysfs_notify(&mddev->kobj, NULL, "sync_action");
76--- a/drivers/md/raid1.c
77+++ b/drivers/md/raid1.c
78@@ -1012,12 +1012,16 @@ static void error(mddev_t *mddev, mdk_rd
79 * else mark the drive as failed
80 */
81 if (test_bit(In_sync, &rdev->flags)
82- && (conf->raid_disks - mddev->degraded) == 1)
83+ && (conf->raid_disks - mddev->degraded) == 1) {
84 /*
85 * Don't fail the drive, act as though we were just a
86- * normal single drive
87+ * normal single drive.
88+ * Disable any future recovery attempts as they will
89+ * likely hit an error on this device.
90 */
91+ set_bit(MD_RECOVERY_DISABLED, &mddev->recovery);
92 return;
93+ }
94 if (test_and_clear_bit(In_sync, &rdev->flags)) {
95 unsigned long flags;
96 spin_lock_irqsave(&conf->device_lock, flags);
97--- a/include/linux/raid/md_k.h
98+++ b/include/linux/raid/md_k.h
99@@ -200,6 +200,8 @@ struct mddev_s
100 * RESHAPE: A reshape is happening
101 *
102 * If neither SYNC or RESHAPE are set, then it is a recovery.
103+ *
104+ * DISABLED: read error on degraded array makes recovery impossible.
105 */
106 #define MD_RECOVERY_RUNNING 0
107 #define MD_RECOVERY_SYNC 1
108@@ -212,6 +214,9 @@ struct mddev_s
109 #define MD_RECOVERY_RESHAPE 8
110 #define MD_RECOVERY_FROZEN 9
111
112+
113+#define MD_RECOVERY_DISABLED 16
114+
115 unsigned long recovery;
116
117 int in_sync; /* know to not need resync */