From 7452f22c88b61cf1d9d0db98b6e65100f92d5107 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 14 Sep 2014 20:49:19 -0700 Subject: [PATCH] 3.10-stable patches added patches: md-raid1-raid10-always-abort-recover-on-write-error.patch --- ...-always-abort-recover-on-write-error.patch | 78 +++++++++++++++++++ queue-3.10/series | 1 + 2 files changed, 79 insertions(+) create mode 100644 queue-3.10/md-raid1-raid10-always-abort-recover-on-write-error.patch diff --git a/queue-3.10/md-raid1-raid10-always-abort-recover-on-write-error.patch b/queue-3.10/md-raid1-raid10-always-abort-recover-on-write-error.patch new file mode 100644 index 00000000000..72e034ec7b6 --- /dev/null +++ b/queue-3.10/md-raid1-raid10-always-abort-recover-on-write-error.patch @@ -0,0 +1,78 @@ +From 2446dba03f9dabe0b477a126cbeb377854785b47 Mon Sep 17 00:00:00 2001 +From: NeilBrown +Date: Thu, 31 Jul 2014 10:16:29 +1000 +Subject: md/raid1,raid10: always abort recover on write error. + +From: NeilBrown + +commit 2446dba03f9dabe0b477a126cbeb377854785b47 upstream. + +Currently we don't abort recovery on a write error if the write error +to the recovering device was triggerd by normal IO (as opposed to +recovery IO). + +This means that for one bitmap region, the recovery might write to the +recovering device for a few sectors, then not bother for subsequent +sectors (as it never writes to failed devices). In this case +the bitmap bit will be cleared, but it really shouldn't. + +The result is that if the recovering device fails and is then re-added +(after fixing whatever hardware problem triggerred the failure), +the second recovery won't redo the region it was in the middle of, +so some of the device will not be recovered properly. + +If we abort the recovery, the region being processes will be cancelled +(bit not cleared) and the whole region will be retried. + +As the bug can result in data corruption the patch is suitable for +-stable. For kernels prior to 3.11 there is a conflict in raid10.c +which will require care. + +Original-from: jiao hui +Reported-and-tested-by: jiao hui +Signed-off-by: NeilBrown +Signed-off-by: Greg Kroah-Hartman + + +--- + drivers/md/raid1.c | 8 ++++---- + drivers/md/raid10.c | 8 ++++---- + 2 files changed, 8 insertions(+), 8 deletions(-) + +--- a/drivers/md/raid10.c ++++ b/drivers/md/raid10.c +@@ -1681,11 +1681,11 @@ static void error(struct mddev *mddev, s + spin_lock_irqsave(&conf->device_lock, flags); + mddev->degraded++; + spin_unlock_irqrestore(&conf->device_lock, flags); +- /* +- * if recovery is running, make sure it aborts. +- */ +- set_bit(MD_RECOVERY_INTR, &mddev->recovery); + } ++ /* ++ * If recovery is running, make sure it aborts. ++ */ ++ set_bit(MD_RECOVERY_INTR, &mddev->recovery); + set_bit(Blocked, &rdev->flags); + set_bit(Faulty, &rdev->flags); + set_bit(MD_CHANGE_DEVS, &mddev->flags); +--- a/drivers/md/raid1.c ++++ b/drivers/md/raid1.c +@@ -1406,12 +1406,12 @@ static void error(struct mddev *mddev, s + mddev->degraded++; + set_bit(Faulty, &rdev->flags); + spin_unlock_irqrestore(&conf->device_lock, flags); +- /* +- * if recovery is running, make sure it aborts. +- */ +- set_bit(MD_RECOVERY_INTR, &mddev->recovery); + } else + set_bit(Faulty, &rdev->flags); ++ /* ++ * if recovery is running, make sure it aborts. ++ */ ++ set_bit(MD_RECOVERY_INTR, &mddev->recovery); + set_bit(MD_CHANGE_DEVS, &mddev->flags); + printk(KERN_ALERT + "md/raid1:%s: Disk failure on %s, disabling device.\n" diff --git a/queue-3.10/series b/queue-3.10/series index 47d761bd968..ae86dba1134 100644 --- a/queue-3.10/series +++ b/queue-3.10/series @@ -52,3 +52,4 @@ xfs-quotacheck-leaves-dquot-buffers-without-verifiers.patch xfs-don-t-dirty-buffers-beyond-eof.patch xfs-don-t-zero-partial-page-cache-pages-during-o_direct-writes.patch xfs-don-t-zero-partial-page-cache-pages-during.patch +md-raid1-raid10-always-abort-recover-on-write-error.patch -- 2.47.3