]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.8-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 3 Dec 2016 08:53:28 +0000 (09:53 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 3 Dec 2016 08:53:28 +0000 (09:53 +0100)
added patches:
rcu-fix-soft-lockup-for-rcu_nocb_kthread.patch

queue-4.8/rcu-fix-soft-lockup-for-rcu_nocb_kthread.patch [new file with mode: 0644]
queue-4.8/series

diff --git a/queue-4.8/rcu-fix-soft-lockup-for-rcu_nocb_kthread.patch b/queue-4.8/rcu-fix-soft-lockup-for-rcu_nocb_kthread.patch
new file mode 100644 (file)
index 0000000..5e55f2f
--- /dev/null
@@ -0,0 +1,88 @@
+From bedc1969150d480c462cdac320fa944b694a7162 Mon Sep 17 00:00:00 2001
+From: Ding Tianhong <dingtianhong@huawei.com>
+Date: Wed, 15 Jun 2016 15:27:36 +0800
+Subject: rcu: Fix soft lockup for rcu_nocb_kthread
+
+From: Ding Tianhong <dingtianhong@huawei.com>
+
+commit bedc1969150d480c462cdac320fa944b694a7162 upstream.
+
+Carrying out the following steps results in a softlockup in the
+RCU callback-offload (rcuo) kthreads:
+
+1. Connect to ixgbevf, and set the speed to 10Gb/s.
+2. Use ifconfig to bring the nic up and down repeatedly.
+
+[  317.005148] IPv6: ADDRCONF(NETDEV_CHANGE): eth2: link becomes ready
+[  368.106005] BUG: soft lockup - CPU#1 stuck for 22s! [rcuos/1:15]
+[  368.106005] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
+[  368.106005] task: ffff88057dd8a220 ti: ffff88057dd9c000 task.ti: ffff88057dd9c000
+[  368.106005] RIP: 0010:[<ffffffff81579e04>]  [<ffffffff81579e04>] fib_table_lookup+0x14/0x390
+[  368.106005] RSP: 0018:ffff88061fc83ce8  EFLAGS: 00000286
+[  368.106005] RAX: 0000000000000001 RBX: 00000000020155c0 RCX: 0000000000000001
+[  368.106005] RDX: ffff88061fc83d50 RSI: ffff88061fc83d70 RDI: ffff880036d11a00
+[  368.106005] RBP: ffff88061fc83d08 R08: 0000000000000001 R09: 0000000000000000
+[  368.106005] R10: ffff880036d11a00 R11: ffffffff819e0900 R12: ffff88061fc83c58
+[  368.106005] R13: ffffffff816154dd R14: ffff88061fc83d08 R15: 00000000020155c0
+[  368.106005] FS:  0000000000000000(0000) GS:ffff88061fc80000(0000) knlGS:0000000000000000
+[  368.106005] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[  368.106005] CR2: 00007f8c2aee9c40 CR3: 000000057b222000 CR4: 00000000000407e0
+[  368.106005] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+[  368.106005] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
+[  368.106005] Stack:
+[  368.106005]  00000000010000c0 ffff88057b766000 ffff8802e380b000 ffff88057af03e00
+[  368.106005]  ffff88061fc83dc0 ffffffff815349a6 ffff88061fc83d40 ffffffff814ee146
+[  368.106005]  ffff8802e380af00 00000000e380af00 ffffffff819e0900 020155c0010000c0
+[  368.106005] Call Trace:
+[  368.106005]  <IRQ>
+[  368.106005]
+[  368.106005]  [<ffffffff815349a6>] ip_route_input_noref+0x516/0xbd0
+[  368.106005]  [<ffffffff814ee146>] ? skb_release_data+0xd6/0x110
+[  368.106005]  [<ffffffff814ee20a>] ? kfree_skb+0x3a/0xa0
+[  368.106005]  [<ffffffff8153698f>] ip_rcv_finish+0x29f/0x350
+[  368.106005]  [<ffffffff81537034>] ip_rcv+0x234/0x380
+[  368.106005]  [<ffffffff814fd656>] __netif_receive_skb_core+0x676/0x870
+[  368.106005]  [<ffffffff814fd868>] __netif_receive_skb+0x18/0x60
+[  368.106005]  [<ffffffff814fe4de>] process_backlog+0xae/0x180
+[  368.106005]  [<ffffffff814fdcb2>] net_rx_action+0x152/0x240
+[  368.106005]  [<ffffffff81077b3f>] __do_softirq+0xef/0x280
+[  368.106005]  [<ffffffff8161619c>] call_softirq+0x1c/0x30
+[  368.106005]  <EOI>
+[  368.106005]
+[  368.106005]  [<ffffffff81015d95>] do_softirq+0x65/0xa0
+[  368.106005]  [<ffffffff81077174>] local_bh_enable+0x94/0xa0
+[  368.106005]  [<ffffffff81114922>] rcu_nocb_kthread+0x232/0x370
+[  368.106005]  [<ffffffff81098250>] ? wake_up_bit+0x30/0x30
+[  368.106005]  [<ffffffff811146f0>] ? rcu_start_gp+0x40/0x40
+[  368.106005]  [<ffffffff8109728f>] kthread+0xcf/0xe0
+[  368.106005]  [<ffffffff810971c0>] ? kthread_create_on_node+0x140/0x140
+[  368.106005]  [<ffffffff816147d8>] ret_from_fork+0x58/0x90
+[  368.106005]  [<ffffffff810971c0>] ? kthread_create_on_node+0x140/0x140
+
+==================================cut here==============================
+
+It turns out that the rcuos callback-offload kthread is busy processing
+a very large quantity of RCU callbacks, and it is not reliquishing the
+CPU while doing so.  This commit therefore adds an cond_resched_rcu_qs()
+within the loop to allow other tasks to run.
+
+Signed-off-by: Ding Tianhong <dingtianhong@huawei.com>
+[ paulmck: Substituted cond_resched_rcu_qs for cond_resched. ]
+Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+Cc: Dhaval Giani <dhaval.giani@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/rcu/tree_plugin.h |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/kernel/rcu/tree_plugin.h
++++ b/kernel/rcu/tree_plugin.h
+@@ -2173,6 +2173,7 @@ static int rcu_nocb_kthread(void *arg)
+                               cl++;
+                       c++;
+                       local_bh_enable();
++                      cond_resched_rcu_qs();
+                       list = next;
+               }
+               trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1);
index 96ee2d07dedb81375e374a9ec6b1e744910b0878..c0dbb501dc666af37fab7c301b0ea94d93710088 100644 (file)
@@ -11,3 +11,4 @@ mm-fix-false-positive-warn_on-in-truncate-invalidate-for-hugetlb.patch
 ovl-fix-d_real-for-stacked-fs.patch
 input-change-key_data-from-0x275-to-0x277.patch
 input-psmouse-disable-automatic-probing-of-byd-touchpads.patch
+rcu-fix-soft-lockup-for-rcu_nocb_kthread.patch