From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 27 Jan 2014 20:06:52 +0000 (-0800)
Subject: 3.13-stable patches
X-Git-Tag: v3.13.1~2
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=5acbff361fe361f246d763faf9772b6fc2e637ac;p=thirdparty%2Fkernel%2Fstable-queue.git

3.13-stable patches

added patches:
	md-raid5-close-recently-introduced-race-in-stripe_head-management.patch
	md-raid5-fix-long-standing-problem-with-bitmap-handling-on-write-failure.patch
---

diff --git a/queue-3.13/md-raid5-close-recently-introduced-race-in-stripe_head-management.patch b/queue-3.13/md-raid5-close-recently-introduced-race-in-stripe_head-management.patch
new file mode 100644
index 00000000000..89dfcd9bd08
--- /dev/null
+++ b/queue-3.13/md-raid5-close-recently-introduced-race-in-stripe_head-management.patch
@@ -0,0 +1,66 @@
+From 7da9d450ab2843bf1db378c156acc6304dbc1c2b Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb@suse.de>
+Date: Wed, 22 Jan 2014 11:45:03 +1100
+Subject: md/raid5: close recently introduced race in stripe_head management.
+
+From: NeilBrown <neilb@suse.de>
+
+commit 7da9d450ab2843bf1db378c156acc6304dbc1c2b upstream.
+
+As release_stripe and __release_stripe decrement ->count and then
+manipulate ->lru both under ->device_lock, it is important that
+get_active_stripe() increments ->count and clears ->lru also under
+->device_lock.
+
+However we currently list_del_init ->lru under the lock, but increment
+the ->count outside the lock.  This can lead to races and list
+corruption.
+
+So move the atomic_inc(&sh->count) up inside the ->device_lock
+protected region.
+
+Note that we still increment ->count without device lock in the case
+where get_free_stripe() was called, and in fact don't take
+->device_lock at all in that path.
+This is safe because if the stripe_head can be found by
+get_free_stripe, then the hash lock assures us the no-one else could
+possibly be calling release_stripe() at the same time.
+
+Fixes: 566c09c53455d7c4f1130928ef8071da1a24ea65
+Reported-and-tested-by: Ian Kumlien <ian.kumlien@gmail.com>
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/raid5.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/drivers/md/raid5.c
++++ b/drivers/md/raid5.c
+@@ -675,8 +675,10 @@ get_active_stripe(struct r5conf *conf, s
+ 					 || !conf->inactive_blocked),
+ 					*(conf->hash_locks + hash));
+ 				conf->inactive_blocked = 0;
+-			} else
++			} else {
+ 				init_stripe(sh, sector, previous);
++				atomic_inc(&sh->count);
++			}
+ 		} else {
+ 			spin_lock(&conf->device_lock);
+ 			if (atomic_read(&sh->count)) {
+@@ -695,13 +697,11 @@ get_active_stripe(struct r5conf *conf, s
+ 					sh->group = NULL;
+ 				}
+ 			}
++			atomic_inc(&sh->count);
+ 			spin_unlock(&conf->device_lock);
+ 		}
+ 	} while (sh == NULL);
+ 
+-	if (sh)
+-		atomic_inc(&sh->count);
+-
+ 	spin_unlock_irq(conf->hash_locks + hash);
+ 	return sh;
+ }
diff --git a/queue-3.13/md-raid5-fix-long-standing-problem-with-bitmap-handling-on-write-failure.patch b/queue-3.13/md-raid5-fix-long-standing-problem-with-bitmap-handling-on-write-failure.patch
new file mode 100644
index 00000000000..9bc4f4cc710
--- /dev/null
+++ b/queue-3.13/md-raid5-fix-long-standing-problem-with-bitmap-handling-on-write-failure.patch
@@ -0,0 +1,46 @@
+From 9f97e4b128d2ea90a5f5063ea0ee3b0911f4c669 Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb@suse.de>
+Date: Thu, 16 Jan 2014 09:35:38 +1100
+Subject: md/raid5: fix long-standing problem with bitmap handling on write failure.
+
+From: NeilBrown <neilb@suse.de>
+
+commit 9f97e4b128d2ea90a5f5063ea0ee3b0911f4c669 upstream.
+
+Before a write starts we set a bit in the write-intent bitmap.
+When the write completes we clear that bit if the write was successful
+to all devices.  However if the write wasn't fully successful we
+should not clear the bit.  If the faulty drive is subsequently
+re-added, the fact that the bit is still set ensure that we will
+re-write the data that is missing.
+
+This logic is mediated by the STRIPE_DEGRADED flag - we only clear the
+bitmap bit when this flag is not set.
+Currently we correctly set the flag if a write starts when some
+devices are failed or missing.  But we do *not* set the flag if some
+device failed during the write attempt.
+This is wrong and can result in clearing the bit inappropriately.
+
+So: set the flag when a write fails.
+
+This bug has been present since bitmaps were introduces, so the fix is
+suitable for any -stable kernel.
+
+Reported-by: Ethan Wilson <ethan.wilson@shiftmail.org>
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/raid5.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/md/raid5.c
++++ b/drivers/md/raid5.c
+@@ -2111,6 +2111,7 @@ static void raid5_end_write_request(stru
+ 			set_bit(R5_MadeGoodRepl, &sh->dev[i].flags);
+ 	} else {
+ 		if (!uptodate) {
++			set_bit(STRIPE_DEGRADED, &sh->state);
+ 			set_bit(WriteErrorSeen, &rdev->flags);
+ 			set_bit(R5_WriteError, &sh->dev[i].flags);
+ 			if (!test_and_set_bit(WantReplacement, &rdev->flags))
diff --git a/queue-3.13/series b/queue-3.13/series
index 565d77b1948..40cc479e646 100644
--- a/queue-3.13/series
+++ b/queue-3.13/series
@@ -6,3 +6,5 @@ mm-make-set-page_address-static-inline-if-want_page_virtual.patch
 serial-amba-pl011-use-port-lock-to-guard-control-register-access.patch
 extcon-gpio-request-gpio-pin-before-modifying-its-state.patch
 alsa-hda-explicitly-keep-codec-powered-up-in-hdmi_present_sense.patch
+md-raid5-fix-long-standing-problem-with-bitmap-handling-on-write-failure.patch
+md-raid5-close-recently-introduced-race-in-stripe_head-management.patch