From: NeilBrown <neilb@suse.de>
Subject: Disable recovery when degraded RAID1 array appears to be faulty.
Patch-mainline: no
References: bnc#447835


If a raid1 has only one working drive and it has a sector which
gives an error on read, then an attempt to recover onto a spare will
fail, but as the single remaining drive is not removed from the
array, the recovery will be immediately re-attempted, resulting
in an infinite recovery loop.

So detect this situation and don't retry recovery once an error
on the lone remaining drive is detected.

Allow recovery to be retried once every time a spare is added
in case the problem wasn't actually a media error.
    

Signed-off-by: Neil Brown <neilb@suse.de>

---
 drivers/md/md.c           |   11 ++++++-----
 drivers/md/raid1.c        |    8 ++++++--
 include/linux/raid/md_k.h |    5 +++++
 3 files changed, 17 insertions(+), 7 deletions(-)

--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -3071,7 +3071,7 @@ action_store(mddev_t *mddev, const char
 			set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 			md_unregister_thread(mddev->sync_thread);
 			mddev->sync_thread = NULL;
-			mddev->recovery = 0;
+			mddev->recovery &= ~65535;
 		}
 	} else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
 		   test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
@@ -4528,7 +4528,7 @@ static int set_bitmap_file(mddev_t *mdde
 	if (mddev->pers) {
 		if (!mddev->pers->quiesce)
 			return -EBUSY;
-		if (mddev->recovery || mddev->sync_thread)
+		if ((mddev->recovery & 65535) || mddev->sync_thread)
 			return -EBUSY;
 		/* we should be able to change the bitmap.. */
 	}
@@ -4783,7 +4783,7 @@ static int update_array_info(mddev_t *md
 	if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
 		if (mddev->pers->quiesce == NULL)
 			return -EINVAL;
-		if (mddev->recovery || mddev->sync_thread)
+		if ((mddev->recovery & 65535) || mddev->sync_thread)
 			return -EBUSY;
 		if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
 			/* add the bitmap */
@@ -6048,7 +6048,8 @@ static int remove_and_add_spares(mddev_t
 			}
 		}
 
-	if (mddev->degraded && ! mddev->ro) {
+	if (mddev->degraded && ! mddev->ro &&
+	    !test_bit(MD_RECOVERY_DISABLED, &mddev->recovery)) {
 		rdev_for_each(rdev, rtmp, mddev) {
 			if (rdev->raid_disk >= 0 &&
 			    !test_bit(In_sync, &rdev->flags) &&
@@ -6200,7 +6201,7 @@ void md_check_recovery(mddev_t *mddev)
 				rdev_for_each(rdev, rtmp, mddev)
 					rdev->saved_raid_disk = -1;
 
-			mddev->recovery = 0;
+			mddev->recovery &= ~65535;
 			/* flag recovery needed just to double check */
 			set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
 			sysfs_notify(&mddev->kobj, NULL, "sync_action");
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1012,12 +1012,16 @@ static void error(mddev_t *mddev, mdk_rd
 	 * else mark the drive as failed
 	 */
 	if (test_bit(In_sync, &rdev->flags)
-	    && (conf->raid_disks - mddev->degraded) == 1)
+	    && (conf->raid_disks - mddev->degraded) == 1) {
 		/*
 		 * Don't fail the drive, act as though we were just a
-		 * normal single drive
+		 * normal single drive.
+		 * Disable any future recovery attempts as they will
+		 * likely hit an error on this device.
 		 */
+		set_bit(MD_RECOVERY_DISABLED, &mddev->recovery);
 		return;
+	}
 	if (test_and_clear_bit(In_sync, &rdev->flags)) {
 		unsigned long flags;
 		spin_lock_irqsave(&conf->device_lock, flags);
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -200,6 +200,8 @@ struct mddev_s
 	 * RESHAPE:  A reshape is happening
 	 *
 	 * If neither SYNC or RESHAPE are set, then it is a recovery.
+	 *
+	 * DISABLED: read error on degraded array makes recovery impossible.
 	 */
 #define	MD_RECOVERY_RUNNING	0
 #define	MD_RECOVERY_SYNC	1
@@ -212,6 +214,9 @@ struct mddev_s
 #define MD_RECOVERY_RESHAPE	8
 #define	MD_RECOVERY_FROZEN	9
 
+
+#define	MD_RECOVERY_DISABLED	16
+
 	unsigned long			recovery;
 
 	int				in_sync;	/* know to not need resync */