From de3bf0284affa40108b97bc4d90f469fe60a949c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 10 Jan 2020 09:00:22 +0100 Subject: [PATCH] 5.4-stable patches added patches: powerpc-spinlocks-include-correct-header-for-static-key.patch powerpc-vcpu-assume-dedicated-processors-as-non-preempt.patch --- ...nclude-correct-header-for-static-key.patch | 45 +++++ ...-dedicated-processors-as-non-preempt.patch | 160 ++++++++++++++++++ queue-5.4/series | 2 + 3 files changed, 207 insertions(+) create mode 100644 queue-5.4/powerpc-spinlocks-include-correct-header-for-static-key.patch create mode 100644 queue-5.4/powerpc-vcpu-assume-dedicated-processors-as-non-preempt.patch diff --git a/queue-5.4/powerpc-spinlocks-include-correct-header-for-static-key.patch b/queue-5.4/powerpc-spinlocks-include-correct-header-for-static-key.patch new file mode 100644 index 00000000000..53a46f82a7f --- /dev/null +++ b/queue-5.4/powerpc-spinlocks-include-correct-header-for-static-key.patch @@ -0,0 +1,45 @@ +From 6da3eced8c5f3b03340b0c395bacd552c4d52411 Mon Sep 17 00:00:00 2001 +From: "Jason A. Donenfeld" +Date: Mon, 23 Dec 2019 14:31:47 +0100 +Subject: powerpc/spinlocks: Include correct header for static key +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Jason A. Donenfeld + +commit 6da3eced8c5f3b03340b0c395bacd552c4d52411 upstream. + +Recently, the spinlock implementation grew a static key optimization, +but the jump_label.h header include was left out, leading to build +errors: + + linux/arch/powerpc/include/asm/spinlock.h:44:7: error: implicit declaration of function ‘static_branch_unlikely’ + 44 | if (!static_branch_unlikely(&shared_processor)) + +This commit adds the missing header. + +mpe: The build break is only seen with CONFIG_JUMP_LABEL=n. + +Fixes: 656c21d6af5d ("powerpc/shared: Use static key to detect shared processor") +Signed-off-by: Jason A. Donenfeld +Reviewed-by: Srikar Dronamraju +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20191223133147.129983-1-Jason@zx2c4.com +Cc: Srikar Dronamraju +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/include/asm/spinlock.h | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/powerpc/include/asm/spinlock.h ++++ b/arch/powerpc/include/asm/spinlock.h +@@ -15,6 +15,7 @@ + * + * (the type definitions are in asm/spinlock_types.h) + */ ++#include + #include + #ifdef CONFIG_PPC64 + #include diff --git a/queue-5.4/powerpc-vcpu-assume-dedicated-processors-as-non-preempt.patch b/queue-5.4/powerpc-vcpu-assume-dedicated-processors-as-non-preempt.patch new file mode 100644 index 00000000000..00099c81d60 --- /dev/null +++ b/queue-5.4/powerpc-vcpu-assume-dedicated-processors-as-non-preempt.patch @@ -0,0 +1,160 @@ +From 14c73bd344da60abaf7da3ea2e7733ddda35bbac Mon Sep 17 00:00:00 2001 +From: Srikar Dronamraju +Date: Thu, 5 Dec 2019 14:02:17 +0530 +Subject: powerpc/vcpu: Assume dedicated processors as non-preempt + +From: Srikar Dronamraju + +commit 14c73bd344da60abaf7da3ea2e7733ddda35bbac upstream. + +With commit 247f2f6f3c70 ("sched/core: Don't schedule threads on +pre-empted vCPUs"), the scheduler avoids preempted vCPUs to schedule +tasks on wakeup. This leads to wrong choice of CPU, which in-turn +leads to larger wakeup latencies. Eventually, it leads to performance +regression in latency sensitive benchmarks like soltp, schbench etc. + +On Powerpc, vcpu_is_preempted() only looks at yield_count. If the +yield_count is odd, the vCPU is assumed to be preempted. However +yield_count is increased whenever the LPAR enters CEDE state (idle). +So any CPU that has entered CEDE state is assumed to be preempted. + +Even if vCPU of dedicated LPAR is preempted/donated, it should have +right of first-use since they are supposed to own the vCPU. + +On a Power9 System with 32 cores: + # lscpu + Architecture: ppc64le + Byte Order: Little Endian + CPU(s): 128 + On-line CPU(s) list: 0-127 + Thread(s) per core: 8 + Core(s) per socket: 1 + Socket(s): 16 + NUMA node(s): 2 + Model: 2.2 (pvr 004e 0202) + Model name: POWER9 (architected), altivec supported + Hypervisor vendor: pHyp + Virtualization type: para + L1d cache: 32K + L1i cache: 32K + L2 cache: 512K + L3 cache: 10240K + NUMA node0 CPU(s): 0-63 + NUMA node1 CPU(s): 64-127 + + # perf stat -a -r 5 ./schbench + v5.4 v5.4 + patch + Latency percentiles (usec) Latency percentiles (usec) + 50.0000th: 45 50.0th: 45 + 75.0000th: 62 75.0th: 63 + 90.0000th: 71 90.0th: 74 + 95.0000th: 77 95.0th: 78 + *99.0000th: 91 *99.0th: 82 + 99.5000th: 707 99.5th: 83 + 99.9000th: 6920 99.9th: 86 + min=0, max=10048 min=0, max=96 + Latency percentiles (usec) Latency percentiles (usec) + 50.0000th: 45 50.0th: 46 + 75.0000th: 61 75.0th: 64 + 90.0000th: 72 90.0th: 75 + 95.0000th: 79 95.0th: 79 + *99.0000th: 691 *99.0th: 83 + 99.5000th: 3972 99.5th: 85 + 99.9000th: 8368 99.9th: 91 + min=0, max=16606 min=0, max=117 + Latency percentiles (usec) Latency percentiles (usec) + 50.0000th: 45 50.0th: 46 + 75.0000th: 61 75.0th: 64 + 90.0000th: 71 90.0th: 75 + 95.0000th: 77 95.0th: 79 + *99.0000th: 106 *99.0th: 83 + 99.5000th: 2364 99.5th: 84 + 99.9000th: 7480 99.9th: 90 + min=0, max=10001 min=0, max=95 + Latency percentiles (usec) Latency percentiles (usec) + 50.0000th: 45 50.0th: 47 + 75.0000th: 62 75.0th: 65 + 90.0000th: 72 90.0th: 75 + 95.0000th: 78 95.0th: 79 + *99.0000th: 93 *99.0th: 84 + 99.5000th: 108 99.5th: 85 + 99.9000th: 6792 99.9th: 90 + min=0, max=17681 min=0, max=117 + Latency percentiles (usec) Latency percentiles (usec) + 50.0000th: 46 50.0th: 45 + 75.0000th: 62 75.0th: 64 + 90.0000th: 73 90.0th: 75 + 95.0000th: 79 95.0th: 79 + *99.0000th: 113 *99.0th: 82 + 99.5000th: 2724 99.5th: 83 + 99.9000th: 6184 99.9th: 93 + min=0, max=9887 min=0, max=111 + + Performance counter stats for 'system wide' (5 runs): + + context-switches 43,373 ( +- 0.40% ) 44,597 ( +- 0.55% ) + cpu-migrations 1,211 ( +- 5.04% ) 220 ( +- 6.23% ) + page-faults 15,983 ( +- 5.21% ) 15,360 ( +- 3.38% ) + +Waiman Long suggested using static_keys. + +Fixes: 247f2f6f3c70 ("sched/core: Don't schedule threads on pre-empted vCPUs") +Cc: stable@vger.kernel.org # v4.18+ +Reported-by: Parth Shah +Reported-by: Ihor Pasichnyk +Tested-by: Juri Lelli +Acked-by: Waiman Long +Reviewed-by: Gautham R. Shenoy +Signed-off-by: Srikar Dronamraju +Acked-by: Phil Auld +Reviewed-by: Vaidyanathan Srinivasan +Tested-by: Parth Shah +[mpe: Move the key and setting of the key to pseries/setup.c] +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20191213035036.6913-1-mpe@ellerman.id.au +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/include/asm/spinlock.h | 4 +++- + arch/powerpc/platforms/pseries/setup.c | 7 +++++++ + 2 files changed, 10 insertions(+), 1 deletion(-) + +--- a/arch/powerpc/include/asm/spinlock.h ++++ b/arch/powerpc/include/asm/spinlock.h +@@ -36,10 +36,12 @@ + #endif + + #ifdef CONFIG_PPC_PSERIES ++DECLARE_STATIC_KEY_FALSE(shared_processor); ++ + #define vcpu_is_preempted vcpu_is_preempted + static inline bool vcpu_is_preempted(int cpu) + { +- if (!firmware_has_feature(FW_FEATURE_SPLPAR)) ++ if (!static_branch_unlikely(&shared_processor)) + return false; + return !!(be32_to_cpu(lppaca_of(cpu).yield_count) & 1); + } +--- a/arch/powerpc/platforms/pseries/setup.c ++++ b/arch/powerpc/platforms/pseries/setup.c +@@ -74,6 +74,9 @@ + #include "pseries.h" + #include "../../../../drivers/pci/pci.h" + ++DEFINE_STATIC_KEY_FALSE(shared_processor); ++EXPORT_SYMBOL_GPL(shared_processor); ++ + int CMO_PrPSP = -1; + int CMO_SecPSP = -1; + unsigned long CMO_PageSize = (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K); +@@ -758,6 +761,10 @@ static void __init pSeries_setup_arch(vo + + if (firmware_has_feature(FW_FEATURE_LPAR)) { + vpa_init(boot_cpuid); ++ ++ if (lppaca_shared_proc(get_lppaca())) ++ static_branch_enable(&shared_processor); ++ + ppc_md.power_save = pseries_lpar_idle; + ppc_md.enable_pmcs = pseries_lpar_enable_pmcs; + #ifdef CONFIG_PCI_IOV diff --git a/queue-5.4/series b/queue-5.4/series index 12d4d2ef581..6bd7d950797 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -72,6 +72,8 @@ pinctrl-pinmux-fix-a-possible-null-pointer-in-pinmux.patch powerpc-ensure-that-swiotlb-buffer-is-allocated-from.patch btrfs-fix-error-messages-in-qgroup_rescan_init.patch btrfs-fix-cloning-range-with-a-hole-when-using-the-n.patch +powerpc-vcpu-assume-dedicated-processors-as-non-preempt.patch +powerpc-spinlocks-include-correct-header-for-static-key.patch btrfs-handle-error-in-btrfs_cache_block_group.patch btrfs-fix-hole-extent-items-with-a-zero-size-after-r.patch ocxl-fix-potential-memory-leak-on-context-creation.patch -- 2.47.3