]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.10-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 3 Oct 2014 19:27:48 +0000 (12:27 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 3 Oct 2014 19:27:48 +0000 (12:27 -0700)
added patches:
sched-fix-unreleased-llc_shared_mask-bit-during-cpu-hotplug.patch

queue-3.10/sched-fix-unreleased-llc_shared_mask-bit-during-cpu-hotplug.patch [new file with mode: 0644]
queue-3.10/series

diff --git a/queue-3.10/sched-fix-unreleased-llc_shared_mask-bit-during-cpu-hotplug.patch b/queue-3.10/sched-fix-unreleased-llc_shared_mask-bit-during-cpu-hotplug.patch
new file mode 100644 (file)
index 0000000..df0c82b
--- /dev/null
@@ -0,0 +1,120 @@
+From 03bd4e1f7265548832a76e7919a81f3137c44fd1 Mon Sep 17 00:00:00 2001
+From: Wanpeng Li <wanpeng.li@linux.intel.com>
+Date: Wed, 24 Sep 2014 16:38:05 +0800
+Subject: sched: Fix unreleased llc_shared_mask bit during CPU hotplug
+
+From: Wanpeng Li <wanpeng.li@linux.intel.com>
+
+commit 03bd4e1f7265548832a76e7919a81f3137c44fd1 upstream.
+
+The following bug can be triggered by hot adding and removing a large number of
+xen domain0's vcpus repeatedly:
+
+       BUG: unable to handle kernel NULL pointer dereference at 0000000000000004 IP: [..] find_busiest_group
+       PGD 5a9d5067 PUD 13067 PMD 0
+       Oops: 0000 [#3] SMP
+       [...]
+       Call Trace:
+       load_balance
+       ? _raw_spin_unlock_irqrestore
+       idle_balance
+       __schedule
+       schedule
+       schedule_timeout
+       ? lock_timer_base
+       schedule_timeout_uninterruptible
+       msleep
+       lock_device_hotplug_sysfs
+       online_store
+       dev_attr_store
+       sysfs_write_file
+       vfs_write
+       SyS_write
+       system_call_fastpath
+
+Last level cache shared mask is built during CPU up and the
+build_sched_domain() routine takes advantage of it to setup
+the sched domain CPU topology.
+
+However, llc_shared_mask is not released during CPU disable,
+which leads to an invalid sched domainCPU topology.
+
+This patch fix it by releasing the llc_shared_mask correctly
+during CPU disable.
+
+Yasuaki also reported that this can happen on real hardware:
+
+  https://lkml.org/lkml/2014/7/22/1018
+
+His case is here:
+
+       ==
+       Here is an example on my system.
+       My system has 4 sockets and each socket has 15 cores and HT is
+       enabled. In this case, each core of sockes is numbered as
+       follows:
+
+                | CPU#
+       Socket#0 | 0-14 , 60-74
+       Socket#1 | 15-29, 75-89
+       Socket#2 | 30-44, 90-104
+       Socket#3 | 45-59, 105-119
+
+       Then llc_shared_mask of CPU#30 has 0x3fff80000001fffc0000000.
+
+       It means that last level cache of Socket#2 is shared with
+       CPU#30-44 and 90-104.
+
+       When hot-removing socket#2 and #3, each core of sockets is
+       numbered as follows:
+
+                | CPU#
+       Socket#0 | 0-14 , 60-74
+       Socket#1 | 15-29, 75-89
+
+       But llc_shared_mask is not cleared. So llc_shared_mask of CPU#30
+       remains having 0x3fff80000001fffc0000000.
+
+       After that, when hot-adding socket#2 and #3, each core of
+       sockets is numbered as follows:
+
+                | CPU#
+       Socket#0 | 0-14 , 60-74
+       Socket#1 | 15-29, 75-89
+       Socket#2 | 30-59
+       Socket#3 | 90-119
+
+       Then llc_shared_mask of CPU#30 becomes
+       0x3fff8000fffffffc0000000. It means that last level cache of
+       Socket#2 is shared with CPU#30-59 and 90-104. So the mask has
+       the wrong value.
+
+Signed-off-by: Wanpeng Li <wanpeng.li@linux.intel.com>
+Tested-by: Linn Crosetto <linn@hp.com>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Toshi Kani <toshi.kani@hp.com>
+Reviewed-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
+Cc: David Rientjes <rientjes@google.com>
+Cc: Prarit Bhargava <prarit@redhat.com>
+Cc: Steven Rostedt <srostedt@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Link: http://lkml.kernel.org/r/1411547885-48165-1-git-send-email-wanpeng.li@linux.intel.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/smpboot.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/x86/kernel/smpboot.c
++++ b/arch/x86/kernel/smpboot.c
+@@ -1284,6 +1284,9 @@ static void remove_siblinginfo(int cpu)
+       for_each_cpu(sibling, cpu_sibling_mask(cpu))
+               cpumask_clear_cpu(cpu, cpu_sibling_mask(sibling));
++      for_each_cpu(sibling, cpu_llc_shared_mask(cpu))
++              cpumask_clear_cpu(cpu, cpu_llc_shared_mask(sibling));
++      cpumask_clear(cpu_llc_shared_mask(cpu));
+       cpumask_clear(cpu_sibling_mask(cpu));
+       cpumask_clear(cpu_core_mask(cpu));
+       c->phys_proc_id = 0;
index b31da6f89d596fbc445144e9b86777f711a50bbd..38effe9a0698b37b43e9b1d0595843a234652a19 100644 (file)
@@ -115,3 +115,4 @@ fsnotify-fdinfo-use-named-constants-instead-of-hardcoded-values.patch
 fs-notify-don-t-show-f_handle-if-exportfs_encode_inode_fh-failed.patch
 nilfs2-fix-data-loss-with-mmap.patch
 ocfs2-dlm-do-not-get-resource-spinlock-if-lockres-is-new.patch
+sched-fix-unreleased-llc_shared_mask-bit-during-cpu-hotplug.patch