]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
more patches added
authorGreg Kroah-Hartman <gregkh@suse.de>
Wed, 6 Sep 2006 21:13:44 +0000 (14:13 -0700)
committerGreg Kroah-Hartman <gregkh@suse.de>
Wed, 6 Sep 2006 21:13:44 +0000 (14:13 -0700)
queue-2.6.17/dm-fix-deadlock-under-high-i-o-load-in-raid1-setup.patch [new file with mode: 0644]
queue-2.6.17/remove-redundant-up-in-stop_machine.patch [new file with mode: 0644]
queue-2.6.17/series

diff --git a/queue-2.6.17/dm-fix-deadlock-under-high-i-o-load-in-raid1-setup.patch b/queue-2.6.17/dm-fix-deadlock-under-high-i-o-load-in-raid1-setup.patch
new file mode 100644 (file)
index 0000000..ef85ebc
--- /dev/null
@@ -0,0 +1,59 @@
+From stable-bounces@linux.kernel.org Sun Aug 27 01:24:04 2006
+Message-Id: <200608270823.k7R8NO3M020753@shell0.pdx.osdl.net>
+To: torvalds@osdl.org
+From: akpm@osdl.org
+Date: Sun, 27 Aug 2006 01:23:24 -0700
+Cc: akpm@osdl.org, kobras@linux.de, stable@kernel.org, agk@redhat.com
+Subject: dm: Fix deadlock under high i/o load in raid1 setup.
+
+From: Daniel Kobras <kobras@linux.de>
+
+On an nForce4-equipped machine with two SATA disk in raid1 setup using dmraid,
+we experienced frequent deadlock of the system under high i/o load.  'cat
+/dev/zero > ~/zero' was the most reliable way to reproduce them: Randomly
+after a few GB, 'cp' would be left in 'D' state along with kjournald and
+kmirrord.  The functions cp and kjournald were blocked in did vary, but
+kmirrord's wchan always pointed to 'mempool_alloc()'.  We've seen this pattern
+on 2.6.15 and 2.6.17 kernels.  http://lkml.org/lkml/2005/4/20/142 indicates
+that this problem has been around even before.
+
+So much for the facts, here's my interpretation: mempool_alloc() first tries
+to atomically allocate the requested memory, or falls back to hand out
+preallocated chunks from the mempool.  If both fail, it puts the calling
+process (kmirrord in this case) on a private waitqueue until somebody refills
+the pool.  Where the only 'somebody' is kmirrord itself, so we have a
+deadlock.
+
+I worked around this problem by falling back to a (blocking) kmalloc when
+before kmirrord would have ended up on the waitqueue.  This defeats part of
+the benefits of using the mempool, but at least keeps the system running.  And
+it could be done with a two-line change.  Note that mempool_alloc() clears the
+GFP_NOIO flag internally, and only uses it to decide whether to wait or return
+an error if immediate allocation fails, so the attached patch doesn't change
+behaviour in the non-deadlocking case.  Path is against current git
+(2.6.18-rc4), but should apply to earlier versions as well.  I've tested on
+2.6.15, where this patch makes the difference between random lockup and a
+stable system.
+
+Signed-off-by: Daniel Kobras <kobras@linux.de>
+Acked-by: Alasdair G Kergon <agk@redhat.com>
+Signed-off-by: Andrew Morton <akpm@osdl.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/md/dm-raid1.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- linux-2.6.17.11.orig/drivers/md/dm-raid1.c
++++ linux-2.6.17.11/drivers/md/dm-raid1.c
+@@ -253,7 +253,9 @@ static struct region *__rh_alloc(struct 
+       struct region *reg, *nreg;
+       read_unlock(&rh->hash_lock);
+-      nreg = mempool_alloc(rh->region_pool, GFP_NOIO);
++      nreg = mempool_alloc(rh->region_pool, GFP_ATOMIC);
++      if (unlikely(!nreg))
++              nreg = kmalloc(sizeof(struct region), GFP_NOIO);
+       nreg->state = rh->log->type->in_sync(rh->log, region, 1) ?
+               RH_CLEAN : RH_NOSYNC;
+       nreg->rh = rh;
diff --git a/queue-2.6.17/remove-redundant-up-in-stop_machine.patch b/queue-2.6.17/remove-redundant-up-in-stop_machine.patch
new file mode 100644 (file)
index 0000000..a2624b9
--- /dev/null
@@ -0,0 +1,31 @@
+From stable-bounces@linux.kernel.org Sun Aug 27 01:24:26 2006
+Message-Id: <200608270823.k7R8Nlu3020847@shell0.pdx.osdl.net>
+To: torvalds@osdl.org
+From: akpm@osdl.org
+Date: Sun, 27 Aug 2006 01:23:46 -0700
+Cc: akpm@osdl.org, stable@kernel.org, yingchao.zhou@gmail.com
+Subject: Remove redundant up() in stop_machine()
+
+From: "Yingchao Zhou" <yingchao.zhou@gmail.com>
+
+An up() is called in kernel/stop_machine.c on failure, and also in the
+caller (unconditionally).
+
+Signed-off-by: Zhou Yingchao <yingchao.zhou@gmail.com>
+Signed-off-by: Andrew Morton <akpm@osdl.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ kernel/stop_machine.c |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- linux-2.6.17.11.orig/kernel/stop_machine.c
++++ linux-2.6.17.11/kernel/stop_machine.c
+@@ -111,7 +111,6 @@ static int stop_machine(void)
+       /* If some failed, kill them all. */
+       if (ret < 0) {
+               stopmachine_set_state(STOPMACHINE_EXIT);
+-              up(&stopmachine_mutex);
+               return ret;
+       }
index 954e0985bd7d8b309c3078591b06e7c99fffe7c9..89a7e884cdcfbbd00f95bfc8ab827828d295b968 100644 (file)
@@ -27,3 +27,5 @@ pktgen-make-sure-skb-nh-h-are-initialized-in-fill_packet_ipv6-too.patch
 pktgen-fix-oops-when-used-with-balance-tlb-bonding.patch
 missing-pci-id-update-for-via-ide.patch
 dvb-core-proper-handling-ule-sndu-length-of-0.patch
+remove-redundant-up-in-stop_machine.patch
+dm-fix-deadlock-under-high-i-o-load-in-raid1-setup.patch