From 7971e0cbeeaa50e8117a70217098f53be33f8079 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 13 Jul 2014 16:50:59 -0700 Subject: [PATCH] 3.10-stable patches added patches: cpuset-mempolicy-fix-sleeping-function-called-from-invalid-context.patch workqueue-fix-dev_set_uevent_suppress-imbalance.patch workqueue-zero-cpumask-of-wq_numa_possible_cpumask-on-init.patch --- ...function-called-from-invalid-context.patch | 100 ++++++++++++++++++ queue-3.10/series | 3 + ...ix-dev_set_uevent_suppress-imbalance.patch | 31 ++++++ ...-of-wq_numa_possible_cpumask-on-init.patch | 86 +++++++++++++++ 4 files changed, 220 insertions(+) create mode 100644 queue-3.10/cpuset-mempolicy-fix-sleeping-function-called-from-invalid-context.patch create mode 100644 queue-3.10/workqueue-fix-dev_set_uevent_suppress-imbalance.patch create mode 100644 queue-3.10/workqueue-zero-cpumask-of-wq_numa_possible_cpumask-on-init.patch diff --git a/queue-3.10/cpuset-mempolicy-fix-sleeping-function-called-from-invalid-context.patch b/queue-3.10/cpuset-mempolicy-fix-sleeping-function-called-from-invalid-context.patch new file mode 100644 index 00000000000..9f2d0865a54 --- /dev/null +++ b/queue-3.10/cpuset-mempolicy-fix-sleeping-function-called-from-invalid-context.patch @@ -0,0 +1,100 @@ +From 391acf970d21219a2a5446282d3b20eace0c0d7a Mon Sep 17 00:00:00 2001 +From: Gu Zheng +Date: Wed, 25 Jun 2014 09:57:18 +0800 +Subject: cpuset,mempolicy: fix sleeping function called from invalid context + +From: Gu Zheng + +commit 391acf970d21219a2a5446282d3b20eace0c0d7a upstream. + +When runing with the kernel(3.15-rc7+), the follow bug occurs: +[ 9969.258987] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:586 +[ 9969.359906] in_atomic(): 1, irqs_disabled(): 0, pid: 160655, name: python +[ 9969.441175] INFO: lockdep is turned off. +[ 9969.488184] CPU: 26 PID: 160655 Comm: python Tainted: G A 3.15.0-rc7+ #85 +[ 9969.581032] Hardware name: FUJITSU-SV PRIMEQUEST 1800E/SB, BIOS PRIMEQUEST 1000 Series BIOS Version 1.39 11/16/2012 +[ 9969.706052] ffffffff81a20e60 ffff8803e941fbd0 ffffffff8162f523 ffff8803e941fd18 +[ 9969.795323] ffff8803e941fbe0 ffffffff8109995a ffff8803e941fc58 ffffffff81633e6c +[ 9969.884710] ffffffff811ba5dc ffff880405c6b480 ffff88041fdd90a0 0000000000002000 +[ 9969.974071] Call Trace: +[ 9970.003403] [] dump_stack+0x4d/0x66 +[ 9970.065074] [] __might_sleep+0xfa/0x130 +[ 9970.130743] [] mutex_lock_nested+0x3c/0x4f0 +[ 9970.200638] [] ? kmem_cache_alloc+0x1bc/0x210 +[ 9970.272610] [] cpuset_mems_allowed+0x27/0x140 +[ 9970.344584] [] ? __mpol_dup+0x63/0x150 +[ 9970.409282] [] __mpol_dup+0xe5/0x150 +[ 9970.471897] [] ? __mpol_dup+0x63/0x150 +[ 9970.536585] [] ? copy_process.part.23+0x606/0x1d40 +[ 9970.613763] [] ? trace_hardirqs_on+0xd/0x10 +[ 9970.683660] [] ? monotonic_to_bootbased+0x2f/0x50 +[ 9970.759795] [] copy_process.part.23+0x670/0x1d40 +[ 9970.834885] [] do_fork+0xd8/0x380 +[ 9970.894375] [] ? __audit_syscall_entry+0x9c/0xf0 +[ 9970.969470] [] SyS_clone+0x16/0x20 +[ 9971.030011] [] stub_clone+0x69/0x90 +[ 9971.091573] [] ? system_call_fastpath+0x16/0x1b + +The cause is that cpuset_mems_allowed() try to take +mutex_lock(&callback_mutex) under the rcu_read_lock(which was hold in +__mpol_dup()). And in cpuset_mems_allowed(), the access to cpuset is +under rcu_read_lock, so in __mpol_dup, we can reduce the rcu_read_lock +protection region to protect the access to cpuset only in +current_cpuset_is_being_rebound(). So that we can avoid this bug. + +This patch is a temporary solution that just addresses the bug +mentioned above, can not fix the long-standing issue about cpuset.mems +rebinding on fork(): + +"When the forker's task_struct is duplicated (which includes + ->mems_allowed) and it races with an update to cpuset_being_rebound + in update_tasks_nodemask() then the task's mems_allowed doesn't get + updated. And the child task's mems_allowed can be wrong if the + cpuset's nodemask changes before the child has been added to the + cgroup's tasklist." + +Signed-off-by: Gu Zheng +Acked-by: Li Zefan +Signed-off-by: Tejun Heo +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/cpuset.c | 8 +++++++- + mm/mempolicy.c | 2 -- + 2 files changed, 7 insertions(+), 3 deletions(-) + +--- a/kernel/cpuset.c ++++ b/kernel/cpuset.c +@@ -1153,7 +1153,13 @@ done: + + int current_cpuset_is_being_rebound(void) + { +- return task_cs(current) == cpuset_being_rebound; ++ int ret; ++ ++ rcu_read_lock(); ++ ret = task_cs(current) == cpuset_being_rebound; ++ rcu_read_unlock(); ++ ++ return ret; + } + + static int update_relax_domain_level(struct cpuset *cs, s64 val) +--- a/mm/mempolicy.c ++++ b/mm/mempolicy.c +@@ -2086,7 +2086,6 @@ struct mempolicy *__mpol_dup(struct memp + } else + *new = *old; + +- rcu_read_lock(); + if (current_cpuset_is_being_rebound()) { + nodemask_t mems = cpuset_mems_allowed(current); + if (new->flags & MPOL_F_REBINDING) +@@ -2094,7 +2093,6 @@ struct mempolicy *__mpol_dup(struct memp + else + mpol_rebind_policy(new, &mems, MPOL_REBIND_ONCE); + } +- rcu_read_unlock(); + atomic_set(&new->refcnt, 1); + return new; + } diff --git a/queue-3.10/series b/queue-3.10/series index 2ead9dda3e6..81d2b007ac4 100644 --- a/queue-3.10/series +++ b/queue-3.10/series @@ -3,3 +3,6 @@ usb-cp210x-add-support-for-corsair-usb-dongle.patch usb-ftdi_sio-add-extra-pid.patch usb-serial-ftdi_sio-add-infineon-triboard.patch parisc-add-serial-ports-of-c8000-1ghz-machine-to-hardware-database.patch +workqueue-fix-dev_set_uevent_suppress-imbalance.patch +cpuset-mempolicy-fix-sleeping-function-called-from-invalid-context.patch +workqueue-zero-cpumask-of-wq_numa_possible_cpumask-on-init.patch diff --git a/queue-3.10/workqueue-fix-dev_set_uevent_suppress-imbalance.patch b/queue-3.10/workqueue-fix-dev_set_uevent_suppress-imbalance.patch new file mode 100644 index 00000000000..b2e233b4b47 --- /dev/null +++ b/queue-3.10/workqueue-fix-dev_set_uevent_suppress-imbalance.patch @@ -0,0 +1,31 @@ +From bddbceb688c6d0decaabc7884fede319d02f96c8 Mon Sep 17 00:00:00 2001 +From: Maxime Bizon +Date: Mon, 23 Jun 2014 16:35:35 +0200 +Subject: workqueue: fix dev_set_uevent_suppress() imbalance + +From: Maxime Bizon + +commit bddbceb688c6d0decaabc7884fede319d02f96c8 upstream. + +Uevents are suppressed during attributes registration, but never +restored, so kobject_uevent() does nothing. + +Signed-off-by: Maxime Bizon +Signed-off-by: Tejun Heo +Fixes: 226223ab3c4118ddd10688cc2c131135848371ab +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/workqueue.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/kernel/workqueue.c ++++ b/kernel/workqueue.c +@@ -3373,6 +3373,7 @@ int workqueue_sysfs_register(struct work + } + } + ++ dev_set_uevent_suppress(&wq_dev->dev, false); + kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD); + return 0; + } diff --git a/queue-3.10/workqueue-zero-cpumask-of-wq_numa_possible_cpumask-on-init.patch b/queue-3.10/workqueue-zero-cpumask-of-wq_numa_possible_cpumask-on-init.patch new file mode 100644 index 00000000000..20055f95196 --- /dev/null +++ b/queue-3.10/workqueue-zero-cpumask-of-wq_numa_possible_cpumask-on-init.patch @@ -0,0 +1,86 @@ +From 5a6024f1604eef119cf3a6fa413fe0261a81a8f3 Mon Sep 17 00:00:00 2001 +From: Yasuaki Ishimatsu +Date: Mon, 7 Jul 2014 09:56:48 -0400 +Subject: workqueue: zero cpumask of wq_numa_possible_cpumask on init + +From: Yasuaki Ishimatsu + +commit 5a6024f1604eef119cf3a6fa413fe0261a81a8f3 upstream. + +When hot-adding and onlining CPU, kernel panic occurs, showing following +call trace. + + BUG: unable to handle kernel paging request at 0000000000001d08 + IP: [] __alloc_pages_nodemask+0x9d/0xb10 + PGD 0 + Oops: 0000 [#1] SMP + ... + Call Trace: + [] ? cpumask_next_and+0x35/0x50 + [] ? find_busiest_group+0x113/0x8f0 + [] ? deactivate_slab+0x349/0x3c0 + [] new_slab+0x91/0x300 + [] __slab_alloc+0x2bb/0x482 + [] ? copy_process.part.25+0xfc/0x14c0 + [] ? load_balance+0x218/0x890 + [] ? sched_clock+0x9/0x10 + [] ? trace_clock_local+0x9/0x10 + [] kmem_cache_alloc_node+0x8c/0x200 + [] copy_process.part.25+0xfc/0x14c0 + [] ? trace_buffer_unlock_commit+0x4d/0x60 + [] ? kthread_create_on_node+0x140/0x140 + [] do_fork+0xbc/0x360 + [] kernel_thread+0x26/0x30 + [] kthreadd+0x2c2/0x300 + [] ? kthread_create_on_cpu+0x60/0x60 + [] ret_from_fork+0x7c/0xb0 + [] ? kthread_create_on_cpu+0x60/0x60 + +In my investigation, I found the root cause is wq_numa_possible_cpumask. +All entries of wq_numa_possible_cpumask is allocated by +alloc_cpumask_var_node(). And these entries are used without initializing. +So these entries have wrong value. + +When hot-adding and onlining CPU, wq_update_unbound_numa() is called. +wq_update_unbound_numa() calls alloc_unbound_pwq(). And alloc_unbound_pwq() +calls get_unbound_pool(). In get_unbound_pool(), worker_pool->node is set +as follow: + +3592 /* if cpumask is contained inside a NUMA node, we belong to that node */ +3593 if (wq_numa_enabled) { +3594 for_each_node(node) { +3595 if (cpumask_subset(pool->attrs->cpumask, +3596 wq_numa_possible_cpumask[node])) { +3597 pool->node = node; +3598 break; +3599 } +3600 } +3601 } + +But wq_numa_possible_cpumask[node] does not have correct cpumask. So, wrong +node is selected. As a result, kernel panic occurs. + +By this patch, all entries of wq_numa_possible_cpumask are allocated by +zalloc_cpumask_var_node to initialize them. And the panic disappeared. + +Signed-off-by: Yasuaki Ishimatsu +Reviewed-by: Lai Jiangshan +Signed-off-by: Tejun Heo +Fixes: bce903809ab3 ("workqueue: add wq_numa_tbl_len and wq_numa_possible_cpumask[]") +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/workqueue.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/workqueue.c ++++ b/kernel/workqueue.c +@@ -4968,7 +4968,7 @@ static void __init wq_numa_init(void) + BUG_ON(!tbl); + + for_each_node(node) +- BUG_ON(!alloc_cpumask_var_node(&tbl[node], GFP_KERNEL, ++ BUG_ON(!zalloc_cpumask_var_node(&tbl[node], GFP_KERNEL, + node_online(node) ? node : NUMA_NO_NODE)); + + for_each_possible_cpu(cpu) { -- 2.47.3