]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 10 Jan 2020 08:00:22 +0000 (09:00 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 10 Jan 2020 08:00:22 +0000 (09:00 +0100)
added patches:
powerpc-spinlocks-include-correct-header-for-static-key.patch
powerpc-vcpu-assume-dedicated-processors-as-non-preempt.patch

queue-5.4/powerpc-spinlocks-include-correct-header-for-static-key.patch [new file with mode: 0644]
queue-5.4/powerpc-vcpu-assume-dedicated-processors-as-non-preempt.patch [new file with mode: 0644]
queue-5.4/series

diff --git a/queue-5.4/powerpc-spinlocks-include-correct-header-for-static-key.patch b/queue-5.4/powerpc-spinlocks-include-correct-header-for-static-key.patch
new file mode 100644 (file)
index 0000000..53a46f8
--- /dev/null
@@ -0,0 +1,45 @@
+From 6da3eced8c5f3b03340b0c395bacd552c4d52411 Mon Sep 17 00:00:00 2001
+From: "Jason A. Donenfeld" <Jason@zx2c4.com>
+Date: Mon, 23 Dec 2019 14:31:47 +0100
+Subject: powerpc/spinlocks: Include correct header for static key
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Jason A. Donenfeld <Jason@zx2c4.com>
+
+commit 6da3eced8c5f3b03340b0c395bacd552c4d52411 upstream.
+
+Recently, the spinlock implementation grew a static key optimization,
+but the jump_label.h header include was left out, leading to build
+errors:
+
+  linux/arch/powerpc/include/asm/spinlock.h:44:7: error: implicit declaration of function ‘static_branch_unlikely’
+   44 |  if (!static_branch_unlikely(&shared_processor))
+
+This commit adds the missing header.
+
+mpe: The build break is only seen with CONFIG_JUMP_LABEL=n.
+
+Fixes: 656c21d6af5d ("powerpc/shared: Use static key to detect shared processor")
+Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
+Reviewed-by: Srikar Dronamraju <srikar@linux.ibm.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20191223133147.129983-1-Jason@zx2c4.com
+Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/include/asm/spinlock.h |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/powerpc/include/asm/spinlock.h
++++ b/arch/powerpc/include/asm/spinlock.h
+@@ -15,6 +15,7 @@
+  *
+  * (the type definitions are in asm/spinlock_types.h)
+  */
++#include <linux/jump_label.h>
+ #include <linux/irqflags.h>
+ #ifdef CONFIG_PPC64
+ #include <asm/paca.h>
diff --git a/queue-5.4/powerpc-vcpu-assume-dedicated-processors-as-non-preempt.patch b/queue-5.4/powerpc-vcpu-assume-dedicated-processors-as-non-preempt.patch
new file mode 100644 (file)
index 0000000..00099c8
--- /dev/null
@@ -0,0 +1,160 @@
+From 14c73bd344da60abaf7da3ea2e7733ddda35bbac Mon Sep 17 00:00:00 2001
+From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
+Date: Thu, 5 Dec 2019 14:02:17 +0530
+Subject: powerpc/vcpu: Assume dedicated processors as non-preempt
+
+From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
+
+commit 14c73bd344da60abaf7da3ea2e7733ddda35bbac upstream.
+
+With commit 247f2f6f3c70 ("sched/core: Don't schedule threads on
+pre-empted vCPUs"), the scheduler avoids preempted vCPUs to schedule
+tasks on wakeup. This leads to wrong choice of CPU, which in-turn
+leads to larger wakeup latencies. Eventually, it leads to performance
+regression in latency sensitive benchmarks like soltp, schbench etc.
+
+On Powerpc, vcpu_is_preempted() only looks at yield_count. If the
+yield_count is odd, the vCPU is assumed to be preempted. However
+yield_count is increased whenever the LPAR enters CEDE state (idle).
+So any CPU that has entered CEDE state is assumed to be preempted.
+
+Even if vCPU of dedicated LPAR is preempted/donated, it should have
+right of first-use since they are supposed to own the vCPU.
+
+On a Power9 System with 32 cores:
+  # lscpu
+  Architecture:        ppc64le
+  Byte Order:          Little Endian
+  CPU(s):              128
+  On-line CPU(s) list: 0-127
+  Thread(s) per core:  8
+  Core(s) per socket:  1
+  Socket(s):           16
+  NUMA node(s):        2
+  Model:               2.2 (pvr 004e 0202)
+  Model name:          POWER9 (architected), altivec supported
+  Hypervisor vendor:   pHyp
+  Virtualization type: para
+  L1d cache:           32K
+  L1i cache:           32K
+  L2 cache:            512K
+  L3 cache:            10240K
+  NUMA node0 CPU(s):   0-63
+  NUMA node1 CPU(s):   64-127
+
+  # perf stat -a -r 5 ./schbench
+  v5.4                               v5.4 + patch
+  Latency percentiles (usec)         Latency percentiles (usec)
+        50.0000th: 45                      50.0th: 45
+        75.0000th: 62                      75.0th: 63
+        90.0000th: 71                      90.0th: 74
+        95.0000th: 77                      95.0th: 78
+        *99.0000th: 91                     *99.0th: 82
+        99.5000th: 707                     99.5th: 83
+        99.9000th: 6920                    99.9th: 86
+        min=0, max=10048                   min=0, max=96
+  Latency percentiles (usec)         Latency percentiles (usec)
+        50.0000th: 45                      50.0th: 46
+        75.0000th: 61                      75.0th: 64
+        90.0000th: 72                      90.0th: 75
+        95.0000th: 79                      95.0th: 79
+        *99.0000th: 691                    *99.0th: 83
+        99.5000th: 3972                    99.5th: 85
+        99.9000th: 8368                    99.9th: 91
+        min=0, max=16606                   min=0, max=117
+  Latency percentiles (usec)         Latency percentiles (usec)
+        50.0000th: 45                      50.0th: 46
+        75.0000th: 61                      75.0th: 64
+        90.0000th: 71                      90.0th: 75
+        95.0000th: 77                      95.0th: 79
+        *99.0000th: 106                    *99.0th: 83
+        99.5000th: 2364                    99.5th: 84
+        99.9000th: 7480                    99.9th: 90
+        min=0, max=10001                   min=0, max=95
+  Latency percentiles (usec)         Latency percentiles (usec)
+        50.0000th: 45                      50.0th: 47
+        75.0000th: 62                      75.0th: 65
+        90.0000th: 72                      90.0th: 75
+        95.0000th: 78                      95.0th: 79
+        *99.0000th: 93                     *99.0th: 84
+        99.5000th: 108                     99.5th: 85
+        99.9000th: 6792                    99.9th: 90
+        min=0, max=17681                   min=0, max=117
+  Latency percentiles (usec)         Latency percentiles (usec)
+        50.0000th: 46                      50.0th: 45
+        75.0000th: 62                      75.0th: 64
+        90.0000th: 73                      90.0th: 75
+        95.0000th: 79                      95.0th: 79
+        *99.0000th: 113                    *99.0th: 82
+        99.5000th: 2724                    99.5th: 83
+        99.9000th: 6184                    99.9th: 93
+        min=0, max=9887                    min=0, max=111
+
+   Performance counter stats for 'system wide' (5 runs):
+
+  context-switches    43,373  ( +-  0.40% )   44,597 ( +-  0.55% )
+  cpu-migrations       1,211  ( +-  5.04% )      220 ( +-  6.23% )
+  page-faults         15,983  ( +-  5.21% )   15,360 ( +-  3.38% )
+
+Waiman Long suggested using static_keys.
+
+Fixes: 247f2f6f3c70 ("sched/core: Don't schedule threads on pre-empted vCPUs")
+Cc: stable@vger.kernel.org # v4.18+
+Reported-by: Parth Shah <parth@linux.ibm.com>
+Reported-by: Ihor Pasichnyk <Ihor.Pasichnyk@ibm.com>
+Tested-by: Juri Lelli <juri.lelli@redhat.com>
+Acked-by: Waiman Long <longman@redhat.com>
+Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
+Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
+Acked-by: Phil Auld <pauld@redhat.com>
+Reviewed-by: Vaidyanathan Srinivasan <svaidy@linux.ibm.com>
+Tested-by: Parth Shah <parth@linux.ibm.com>
+[mpe: Move the key and setting of the key to pseries/setup.c]
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20191213035036.6913-1-mpe@ellerman.id.au
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/include/asm/spinlock.h    |    4 +++-
+ arch/powerpc/platforms/pseries/setup.c |    7 +++++++
+ 2 files changed, 10 insertions(+), 1 deletion(-)
+
+--- a/arch/powerpc/include/asm/spinlock.h
++++ b/arch/powerpc/include/asm/spinlock.h
+@@ -36,10 +36,12 @@
+ #endif
+ #ifdef CONFIG_PPC_PSERIES
++DECLARE_STATIC_KEY_FALSE(shared_processor);
++
+ #define vcpu_is_preempted vcpu_is_preempted
+ static inline bool vcpu_is_preempted(int cpu)
+ {
+-      if (!firmware_has_feature(FW_FEATURE_SPLPAR))
++      if (!static_branch_unlikely(&shared_processor))
+               return false;
+       return !!(be32_to_cpu(lppaca_of(cpu).yield_count) & 1);
+ }
+--- a/arch/powerpc/platforms/pseries/setup.c
++++ b/arch/powerpc/platforms/pseries/setup.c
+@@ -74,6 +74,9 @@
+ #include "pseries.h"
+ #include "../../../../drivers/pci/pci.h"
++DEFINE_STATIC_KEY_FALSE(shared_processor);
++EXPORT_SYMBOL_GPL(shared_processor);
++
+ int CMO_PrPSP = -1;
+ int CMO_SecPSP = -1;
+ unsigned long CMO_PageSize = (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K);
+@@ -758,6 +761,10 @@ static void __init pSeries_setup_arch(vo
+       if (firmware_has_feature(FW_FEATURE_LPAR)) {
+               vpa_init(boot_cpuid);
++
++              if (lppaca_shared_proc(get_lppaca()))
++                      static_branch_enable(&shared_processor);
++
+               ppc_md.power_save = pseries_lpar_idle;
+               ppc_md.enable_pmcs = pseries_lpar_enable_pmcs;
+ #ifdef CONFIG_PCI_IOV
index 12d4d2ef58196ffd55612e2a0a92553480fc3add..6bd7d9507977fc28e6dadba505e56744cf444632 100644 (file)
@@ -72,6 +72,8 @@ pinctrl-pinmux-fix-a-possible-null-pointer-in-pinmux.patch
 powerpc-ensure-that-swiotlb-buffer-is-allocated-from.patch
 btrfs-fix-error-messages-in-qgroup_rescan_init.patch
 btrfs-fix-cloning-range-with-a-hole-when-using-the-n.patch
+powerpc-vcpu-assume-dedicated-processors-as-non-preempt.patch
+powerpc-spinlocks-include-correct-header-for-static-key.patch
 btrfs-handle-error-in-btrfs_cache_block_group.patch
 btrfs-fix-hole-extent-items-with-a-zero-size-after-r.patch
 ocxl-fix-potential-memory-leak-on-context-creation.patch