From 7971e0cbeeaa50e8117a70217098f53be33f8079 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sun, 13 Jul 2014 16:50:59 -0700
Subject: [PATCH] 3.10-stable patches

added patches:
	cpuset-mempolicy-fix-sleeping-function-called-from-invalid-context.patch
	workqueue-fix-dev_set_uevent_suppress-imbalance.patch
	workqueue-zero-cpumask-of-wq_numa_possible_cpumask-on-init.patch
---
 ...function-called-from-invalid-context.patch | 100 ++++++++++++++++++
 queue-3.10/series                             |   3 +
 ...ix-dev_set_uevent_suppress-imbalance.patch |  31 ++++++
 ...-of-wq_numa_possible_cpumask-on-init.patch |  86 +++++++++++++++
 4 files changed, 220 insertions(+)
 create mode 100644 queue-3.10/cpuset-mempolicy-fix-sleeping-function-called-from-invalid-context.patch
 create mode 100644 queue-3.10/workqueue-fix-dev_set_uevent_suppress-imbalance.patch
 create mode 100644 queue-3.10/workqueue-zero-cpumask-of-wq_numa_possible_cpumask-on-init.patch
diff --git a/queue-3.10/cpuset-mempolicy-fix-sleeping-function-called-from-invalid-context.patch b/queue-3.10/cpuset-mempolicy-fix-sleeping-function-called-from-invalid-context.patch
new file mode 100644
index 00000000000..9f2d0865a54
--- /dev/null
+++ b/queue-3.10/cpuset-mempolicy-fix-sleeping-function-called-from-invalid-context.patch
@@ -0,0 +1,100 @@
+From 391acf970d21219a2a5446282d3b20eace0c0d7a Mon Sep 17 00:00:00 2001
+From: Gu Zheng <guz.fnst@cn.fujitsu.com>
+Date: Wed, 25 Jun 2014 09:57:18 +0800
+Subject: cpuset,mempolicy: fix sleeping function called from invalid context
+
+From: Gu Zheng <guz.fnst@cn.fujitsu.com>
+
+commit 391acf970d21219a2a5446282d3b20eace0c0d7a upstream.
+
+When runing with the kernel(3.15-rc7+), the follow bug occurs:
+[ 9969.258987] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:586
+[ 9969.359906] in_atomic(): 1, irqs_disabled(): 0, pid: 160655, name: python
+[ 9969.441175] INFO: lockdep is turned off.
+[ 9969.488184] CPU: 26 PID: 160655 Comm: python Tainted: G       A      3.15.0-rc7+ #85
+[ 9969.581032] Hardware name: FUJITSU-SV PRIMEQUEST 1800E/SB, BIOS PRIMEQUEST 1000 Series BIOS Version 1.39 11/16/2012
+[ 9969.706052]  ffffffff81a20e60 ffff8803e941fbd0 ffffffff8162f523 ffff8803e941fd18
+[ 9969.795323]  ffff8803e941fbe0 ffffffff8109995a ffff8803e941fc58 ffffffff81633e6c
+[ 9969.884710]  ffffffff811ba5dc ffff880405c6b480 ffff88041fdd90a0 0000000000002000
+[ 9969.974071] Call Trace:
+[ 9970.003403]  [<ffffffff8162f523>] dump_stack+0x4d/0x66
+[ 9970.065074]  [<ffffffff8109995a>] __might_sleep+0xfa/0x130
+[ 9970.130743]  [<ffffffff81633e6c>] mutex_lock_nested+0x3c/0x4f0
+[ 9970.200638]  [<ffffffff811ba5dc>] ? kmem_cache_alloc+0x1bc/0x210
+[ 9970.272610]  [<ffffffff81105807>] cpuset_mems_allowed+0x27/0x140
+[ 9970.344584]  [<ffffffff811b1303>] ? __mpol_dup+0x63/0x150
+[ 9970.409282]  [<ffffffff811b1385>] __mpol_dup+0xe5/0x150
+[ 9970.471897]  [<ffffffff811b1303>] ? __mpol_dup+0x63/0x150
+[ 9970.536585]  [<ffffffff81068c86>] ? copy_process.part.23+0x606/0x1d40
+[ 9970.613763]  [<ffffffff810bf28d>] ? trace_hardirqs_on+0xd/0x10
+[ 9970.683660]  [<ffffffff810ddddf>] ? monotonic_to_bootbased+0x2f/0x50
+[ 9970.759795]  [<ffffffff81068cf0>] copy_process.part.23+0x670/0x1d40
+[ 9970.834885]  [<ffffffff8106a598>] do_fork+0xd8/0x380
+[ 9970.894375]  [<ffffffff81110e4c>] ? __audit_syscall_entry+0x9c/0xf0
+[ 9970.969470]  [<ffffffff8106a8c6>] SyS_clone+0x16/0x20
+[ 9971.030011]  [<ffffffff81642009>] stub_clone+0x69/0x90
+[ 9971.091573]  [<ffffffff81641c29>] ? system_call_fastpath+0x16/0x1b
+
+The cause is that cpuset_mems_allowed() try to take
+mutex_lock(&callback_mutex) under the rcu_read_lock(which was hold in
+__mpol_dup()). And in cpuset_mems_allowed(), the access to cpuset is
+under rcu_read_lock, so in __mpol_dup, we can reduce the rcu_read_lock
+protection region to protect the access to cpuset only in
+current_cpuset_is_being_rebound(). So that we can avoid this bug.
+
+This patch is a temporary solution that just addresses the bug
+mentioned above, can not fix the long-standing issue about cpuset.mems
+rebinding on fork():
+
+"When the forker's task_struct is duplicated (which includes
+ ->mems_allowed) and it races with an update to cpuset_being_rebound
+ in update_tasks_nodemask() then the task's mems_allowed doesn't get
+ updated. And the child task's mems_allowed can be wrong if the
+ cpuset's nodemask changes before the child has been added to the
+ cgroup's tasklist."
+
+Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
+Acked-by: Li Zefan <lizefan@huawei.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/cpuset.c |    8 +++++++-
+ mm/mempolicy.c  |    2 --
+ 2 files changed, 7 insertions(+), 3 deletions(-)
+
+--- a/kernel/cpuset.c
++++ b/kernel/cpuset.c
+@@ -1153,7 +1153,13 @@ done:
+ 
+ int current_cpuset_is_being_rebound(void)
+ {
+-	return task_cs(current) == cpuset_being_rebound;
++	int ret;
++
++	rcu_read_lock();
++	ret = task_cs(current) == cpuset_being_rebound;
++	rcu_read_unlock();
++
++	return ret;
+ }
+ 
+ static int update_relax_domain_level(struct cpuset *cs, s64 val)
+--- a/mm/mempolicy.c
++++ b/mm/mempolicy.c
+@@ -2086,7 +2086,6 @@ struct mempolicy *__mpol_dup(struct memp
+ 	} else
+ 		*new = *old;
+ 
+-	rcu_read_lock();
+ 	if (current_cpuset_is_being_rebound()) {
+ 		nodemask_t mems = cpuset_mems_allowed(current);
+ 		if (new->flags & MPOL_F_REBINDING)
+@@ -2094,7 +2093,6 @@ struct mempolicy *__mpol_dup(struct memp
+ 		else
+ 			mpol_rebind_policy(new, &mems, MPOL_REBIND_ONCE);
+ 	}
+-	rcu_read_unlock();
+ 	atomic_set(&new->refcnt, 1);
+ 	return new;
+ }
diff --git a/queue-3.10/series b/queue-3.10/series
index 2ead9dda3e6..81d2b007ac4 100644
--- a/queue-3.10/series
+++ b/queue-3.10/series
@@ -3,3 +3,6 @@ usb-cp210x-add-support-for-corsair-usb-dongle.patch
 usb-ftdi_sio-add-extra-pid.patch
 usb-serial-ftdi_sio-add-infineon-triboard.patch
 parisc-add-serial-ports-of-c8000-1ghz-machine-to-hardware-database.patch
+workqueue-fix-dev_set_uevent_suppress-imbalance.patch
+cpuset-mempolicy-fix-sleeping-function-called-from-invalid-context.patch
+workqueue-zero-cpumask-of-wq_numa_possible_cpumask-on-init.patch
diff --git a/queue-3.10/workqueue-fix-dev_set_uevent_suppress-imbalance.patch b/queue-3.10/workqueue-fix-dev_set_uevent_suppress-imbalance.patch
new file mode 100644
index 00000000000..b2e233b4b47
--- /dev/null
+++ b/queue-3.10/workqueue-fix-dev_set_uevent_suppress-imbalance.patch
@@ -0,0 +1,31 @@
+From bddbceb688c6d0decaabc7884fede319d02f96c8 Mon Sep 17 00:00:00 2001
+From: Maxime Bizon <mbizon@freebox.fr>
+Date: Mon, 23 Jun 2014 16:35:35 +0200
+Subject: workqueue: fix dev_set_uevent_suppress() imbalance
+
+From: Maxime Bizon <mbizon@freebox.fr>
+
+commit bddbceb688c6d0decaabc7884fede319d02f96c8 upstream.
+
+Uevents are suppressed during attributes registration, but never
+restored, so kobject_uevent() does nothing.
+
+Signed-off-by: Maxime Bizon <mbizon@freebox.fr>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Fixes: 226223ab3c4118ddd10688cc2c131135848371ab
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/workqueue.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/kernel/workqueue.c
++++ b/kernel/workqueue.c
+@@ -3373,6 +3373,7 @@ int workqueue_sysfs_register(struct work
+ 		}
+ 	}
+ 
++	dev_set_uevent_suppress(&wq_dev->dev, false);
+ 	kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD);
+ 	return 0;
+ }
diff --git a/queue-3.10/workqueue-zero-cpumask-of-wq_numa_possible_cpumask-on-init.patch b/queue-3.10/workqueue-zero-cpumask-of-wq_numa_possible_cpumask-on-init.patch
new file mode 100644
index 00000000000..20055f95196
--- /dev/null
+++ b/queue-3.10/workqueue-zero-cpumask-of-wq_numa_possible_cpumask-on-init.patch
@@ -0,0 +1,86 @@
+From 5a6024f1604eef119cf3a6fa413fe0261a81a8f3 Mon Sep 17 00:00:00 2001
+From: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
+Date: Mon, 7 Jul 2014 09:56:48 -0400
+Subject: workqueue: zero cpumask of wq_numa_possible_cpumask on init
+
+From: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
+
+commit 5a6024f1604eef119cf3a6fa413fe0261a81a8f3 upstream.
+
+When hot-adding and onlining CPU, kernel panic occurs, showing following
+call trace.
+
+  BUG: unable to handle kernel paging request at 0000000000001d08
+  IP: [<ffffffff8114acfd>] __alloc_pages_nodemask+0x9d/0xb10
+  PGD 0
+  Oops: 0000 [#1] SMP
+  ...
+  Call Trace:
+   [<ffffffff812b8745>] ? cpumask_next_and+0x35/0x50
+   [<ffffffff810a3283>] ? find_busiest_group+0x113/0x8f0
+   [<ffffffff81193bc9>] ? deactivate_slab+0x349/0x3c0
+   [<ffffffff811926f1>] new_slab+0x91/0x300
+   [<ffffffff815de95a>] __slab_alloc+0x2bb/0x482
+   [<ffffffff8105bc1c>] ? copy_process.part.25+0xfc/0x14c0
+   [<ffffffff810a3c78>] ? load_balance+0x218/0x890
+   [<ffffffff8101a679>] ? sched_clock+0x9/0x10
+   [<ffffffff81105ba9>] ? trace_clock_local+0x9/0x10
+   [<ffffffff81193d1c>] kmem_cache_alloc_node+0x8c/0x200
+   [<ffffffff8105bc1c>] copy_process.part.25+0xfc/0x14c0
+   [<ffffffff81114d0d>] ? trace_buffer_unlock_commit+0x4d/0x60
+   [<ffffffff81085a80>] ? kthread_create_on_node+0x140/0x140
+   [<ffffffff8105d0ec>] do_fork+0xbc/0x360
+   [<ffffffff8105d3b6>] kernel_thread+0x26/0x30
+   [<ffffffff81086652>] kthreadd+0x2c2/0x300
+   [<ffffffff81086390>] ? kthread_create_on_cpu+0x60/0x60
+   [<ffffffff815f20ec>] ret_from_fork+0x7c/0xb0
+   [<ffffffff81086390>] ? kthread_create_on_cpu+0x60/0x60
+
+In my investigation, I found the root cause is wq_numa_possible_cpumask.
+All entries of wq_numa_possible_cpumask is allocated by
+alloc_cpumask_var_node(). And these entries are used without initializing.
+So these entries have wrong value.
+
+When hot-adding and onlining CPU, wq_update_unbound_numa() is called.
+wq_update_unbound_numa() calls alloc_unbound_pwq(). And alloc_unbound_pwq()
+calls get_unbound_pool(). In get_unbound_pool(), worker_pool->node is set
+as follow:
+
+3592         /* if cpumask is contained inside a NUMA node, we belong to that node */
+3593         if (wq_numa_enabled) {
+3594                 for_each_node(node) {
+3595                         if (cpumask_subset(pool->attrs->cpumask,
+3596                                            wq_numa_possible_cpumask[node])) {
+3597                                 pool->node = node;
+3598                                 break;
+3599                         }
+3600                 }
+3601         }
+
+But wq_numa_possible_cpumask[node] does not have correct cpumask. So, wrong
+node is selected. As a result, kernel panic occurs.
+
+By this patch, all entries of wq_numa_possible_cpumask are allocated by
+zalloc_cpumask_var_node to initialize them. And the panic disappeared.
+
+Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
+Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Fixes: bce903809ab3 ("workqueue: add wq_numa_tbl_len and wq_numa_possible_cpumask[]")
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/workqueue.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/workqueue.c
++++ b/kernel/workqueue.c
+@@ -4968,7 +4968,7 @@ static void __init wq_numa_init(void)
+ 	BUG_ON(!tbl);
+ 
+ 	for_each_node(node)
+-		BUG_ON(!alloc_cpumask_var_node(&tbl[node], GFP_KERNEL,
++		BUG_ON(!zalloc_cpumask_var_node(&tbl[node], GFP_KERNEL,
+ 				node_online(node) ? node : NUMA_NO_NODE));
+ 
+ 	for_each_possible_cpu(cpu) {
-- 
2.47.3