]>
Commit | Line | Data |
---|---|---|
00e5a55c BS |
1 | From: NeilBrown <neilb@suse.de> |
2 | Subject: Disable recovery when degraded RAID1 array appears to be faulty. | |
3 | Patch-mainline: no | |
4 | References: bnc#447835 | |
5 | ||
6 | ||
7 | If a raid1 has only one working drive and it has a sector which | |
8 | gives an error on read, then an attempt to recover onto a spare will | |
9 | fail, but as the single remaining drive is not removed from the | |
10 | array, the recovery will be immediately re-attempted, resulting | |
11 | in an infinite recovery loop. | |
12 | ||
13 | So detect this situation and don't retry recovery once an error | |
14 | on the lone remaining drive is detected. | |
15 | ||
16 | Allow recovery to be retried once every time a spare is added | |
17 | in case the problem wasn't actually a media error. | |
18 | ||
19 | ||
20 | Signed-off-by: Neil Brown <neilb@suse.de> | |
21 | ||
22 | --- | |
23 | drivers/md/md.c | 11 ++++++----- | |
24 | drivers/md/raid1.c | 8 ++++++-- | |
25 | include/linux/raid/md_k.h | 5 +++++ | |
26 | 3 files changed, 17 insertions(+), 7 deletions(-) | |
27 | ||
28 | --- a/drivers/md/md.c | |
29 | +++ b/drivers/md/md.c | |
30 | @@ -3071,7 +3071,7 @@ action_store(mddev_t *mddev, const char | |
31 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); | |
32 | md_unregister_thread(mddev->sync_thread); | |
33 | mddev->sync_thread = NULL; | |
34 | - mddev->recovery = 0; | |
35 | + mddev->recovery &= ~65535; | |
36 | } | |
37 | } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || | |
38 | test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) | |
39 | @@ -4528,7 +4528,7 @@ static int set_bitmap_file(mddev_t *mdde | |
40 | if (mddev->pers) { | |
41 | if (!mddev->pers->quiesce) | |
42 | return -EBUSY; | |
43 | - if (mddev->recovery || mddev->sync_thread) | |
44 | + if ((mddev->recovery & 65535) || mddev->sync_thread) | |
45 | return -EBUSY; | |
46 | /* we should be able to change the bitmap.. */ | |
47 | } | |
48 | @@ -4783,7 +4783,7 @@ static int update_array_info(mddev_t *md | |
49 | if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) { | |
50 | if (mddev->pers->quiesce == NULL) | |
51 | return -EINVAL; | |
52 | - if (mddev->recovery || mddev->sync_thread) | |
53 | + if ((mddev->recovery & 65535) || mddev->sync_thread) | |
54 | return -EBUSY; | |
55 | if (info->state & (1<<MD_SB_BITMAP_PRESENT)) { | |
56 | /* add the bitmap */ | |
57 | @@ -6048,7 +6048,8 @@ static int remove_and_add_spares(mddev_t | |
58 | } | |
59 | } | |
60 | ||
61 | - if (mddev->degraded && ! mddev->ro) { | |
62 | + if (mddev->degraded && ! mddev->ro && | |
63 | + !test_bit(MD_RECOVERY_DISABLED, &mddev->recovery)) { | |
64 | rdev_for_each(rdev, rtmp, mddev) { | |
65 | if (rdev->raid_disk >= 0 && | |
66 | !test_bit(In_sync, &rdev->flags) && | |
67 | @@ -6200,7 +6201,7 @@ void md_check_recovery(mddev_t *mddev) | |
68 | rdev_for_each(rdev, rtmp, mddev) | |
69 | rdev->saved_raid_disk = -1; | |
70 | ||
71 | - mddev->recovery = 0; | |
72 | + mddev->recovery &= ~65535; | |
73 | /* flag recovery needed just to double check */ | |
74 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | |
75 | sysfs_notify(&mddev->kobj, NULL, "sync_action"); | |
76 | --- a/drivers/md/raid1.c | |
77 | +++ b/drivers/md/raid1.c | |
78 | @@ -1012,12 +1012,16 @@ static void error(mddev_t *mddev, mdk_rd | |
79 | * else mark the drive as failed | |
80 | */ | |
81 | if (test_bit(In_sync, &rdev->flags) | |
82 | - && (conf->raid_disks - mddev->degraded) == 1) | |
83 | + && (conf->raid_disks - mddev->degraded) == 1) { | |
84 | /* | |
85 | * Don't fail the drive, act as though we were just a | |
86 | - * normal single drive | |
87 | + * normal single drive. | |
88 | + * Disable any future recovery attempts as they will | |
89 | + * likely hit an error on this device. | |
90 | */ | |
91 | + set_bit(MD_RECOVERY_DISABLED, &mddev->recovery); | |
92 | return; | |
93 | + } | |
94 | if (test_and_clear_bit(In_sync, &rdev->flags)) { | |
95 | unsigned long flags; | |
96 | spin_lock_irqsave(&conf->device_lock, flags); | |
97 | --- a/include/linux/raid/md_k.h | |
98 | +++ b/include/linux/raid/md_k.h | |
99 | @@ -200,6 +200,8 @@ struct mddev_s | |
100 | * RESHAPE: A reshape is happening | |
101 | * | |
102 | * If neither SYNC or RESHAPE are set, then it is a recovery. | |
103 | + * | |
104 | + * DISABLED: read error on degraded array makes recovery impossible. | |
105 | */ | |
106 | #define MD_RECOVERY_RUNNING 0 | |
107 | #define MD_RECOVERY_SYNC 1 | |
108 | @@ -212,6 +214,9 @@ struct mddev_s | |
109 | #define MD_RECOVERY_RESHAPE 8 | |
110 | #define MD_RECOVERY_FROZEN 9 | |
111 | ||
112 | + | |
113 | +#define MD_RECOVERY_DISABLED 16 | |
114 | + | |
115 | unsigned long recovery; | |
116 | ||
117 | int in_sync; /* know to not need resync */ |