From: Sasha Levin Date: Fri, 28 Aug 2020 17:05:11 +0000 (-0400) Subject: Fixes for 5.4 X-Git-Tag: v4.4.235~64 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=97ddc51850b077e99fdee6a808c34ac2a57703a6;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 5.4 Signed-off-by: Sasha Levin --- diff --git a/queue-5.4/revert-ath10k-fix-dma-related-firmware-crashes-on-mu.patch b/queue-5.4/revert-ath10k-fix-dma-related-firmware-crashes-on-mu.patch new file mode 100644 index 00000000000..dae83aea8ba --- /dev/null +++ b/queue-5.4/revert-ath10k-fix-dma-related-firmware-crashes-on-mu.patch @@ -0,0 +1,51 @@ +From 52d3a736bde7ccb0648222bcad959954fb6750c1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 14 Jan 2020 12:35:21 +0800 +Subject: Revert "ath10k: fix DMA related firmware crashes on multiple devices" + +From: Zhi Chen + +[ Upstream commit a1769bb68a850508a492e3674ab1e5e479b11254 ] + +This reverts commit 76d164f582150fd0259ec0fcbc485470bcd8033e. +PCIe hung issue was observed on multiple platforms. The issue was reproduced +when DUT was configured as AP and associated with 50+ STAs. + +For QCA9984/QCA9888, the DMA_BURST_SIZE register controls the AXI burst size +of the RD/WR access to the HOST MEM. +0 - No split , RAW read/write transfer size from MAC is put out on bus + as burst length +1 - Split at 256 byte boundary +2,3 - Reserved + +With PCIe protocol analyzer, we can see DMA Read crossing 4KB boundary when +issue happened. It broke PCIe spec and caused PCIe stuck. So revert +the default value from 0 to 1. + +Tested: IPQ8064 + QCA9984 with firmware 10.4-3.10-00047 + QCS404 + QCA9984 with firmware 10.4-3.9.0.2--00044 + Synaptics AS370 + QCA9888 with firmware 10.4-3.9.0.2--00040 + +Signed-off-by: Zhi Chen +Signed-off-by: Kalle Valo +Signed-off-by: Sasha Levin +--- + drivers/net/wireless/ath/ath10k/hw.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/wireless/ath/ath10k/hw.h b/drivers/net/wireless/ath/ath10k/hw.h +index 2ae57c1de7b55..ae4c9edc445c3 100644 +--- a/drivers/net/wireless/ath/ath10k/hw.h ++++ b/drivers/net/wireless/ath/ath10k/hw.h +@@ -810,7 +810,7 @@ ath10k_is_rssi_enable(struct ath10k_hw_params *hw, + + #define TARGET_10_4_TX_DBG_LOG_SIZE 1024 + #define TARGET_10_4_NUM_WDS_ENTRIES 32 +-#define TARGET_10_4_DMA_BURST_SIZE 0 ++#define TARGET_10_4_DMA_BURST_SIZE 1 + #define TARGET_10_4_MAC_AGGR_DELIM 0 + #define TARGET_10_4_RX_SKIP_DEFRAG_TIMEOUT_DUP_DETECTION_CHECK 1 + #define TARGET_10_4_VOW_CONFIG 0 +-- +2.25.1 + diff --git a/queue-5.4/sched-uclamp-fix-a-deadlock-when-enabling-uclamp-sta.patch b/queue-5.4/sched-uclamp-fix-a-deadlock-when-enabling-uclamp-sta.patch new file mode 100644 index 00000000000..738ff1898fa --- /dev/null +++ b/queue-5.4/sched-uclamp-fix-a-deadlock-when-enabling-uclamp-sta.patch @@ -0,0 +1,62 @@ +From 561aadbd76d8169d884befe321b8211dc29af1b1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 28 Aug 2020 13:56:10 +0100 +Subject: sched/uclamp: Fix a deadlock when enabling uclamp static key + +From: Qais Yousef + +[ Upstream commit e65855a52b479f98674998cb23b21ef5a8144b04 ] + +The following splat was caught when setting uclamp value of a task: + + BUG: sleeping function called from invalid context at ./include/linux/percpu-rwsem.h:49 + + cpus_read_lock+0x68/0x130 + static_key_enable+0x1c/0x38 + __sched_setscheduler+0x900/0xad8 + +Fix by ensuring we enable the key outside of the critical section in +__sched_setscheduler() + +Fixes: 46609ce22703 ("sched/uclamp: Protect uclamp fast path code with static key") +Signed-off-by: Qais Yousef +Signed-off-by: Peter Zijlstra (Intel) +Link: https://lkml.kernel.org/r/20200716110347.19553-4-qais.yousef@arm.com +Signed-off-by: Qais Yousef +Signed-off-by: Sasha Levin +--- + kernel/sched/core.c | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index a8ab68aa189a9..352239c411a44 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -1249,6 +1249,15 @@ static int uclamp_validate(struct task_struct *p, + if (upper_bound > SCHED_CAPACITY_SCALE) + return -EINVAL; + ++ /* ++ * We have valid uclamp attributes; make sure uclamp is enabled. ++ * ++ * We need to do that here, because enabling static branches is a ++ * blocking operation which obviously cannot be done while holding ++ * scheduler locks. ++ */ ++ static_branch_enable(&sched_uclamp_used); ++ + return 0; + } + +@@ -1279,8 +1288,6 @@ static void __setscheduler_uclamp(struct task_struct *p, + if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP))) + return; + +- static_branch_enable(&sched_uclamp_used); +- + if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) { + uclamp_se_set(&p->uclamp_req[UCLAMP_MIN], + attr->sched_util_min, true); +-- +2.25.1 + diff --git a/queue-5.4/sched-uclamp-protect-uclamp-fast-path-code-with-stat.patch b/queue-5.4/sched-uclamp-protect-uclamp-fast-path-code-with-stat.patch new file mode 100644 index 00000000000..c4517b8e36a --- /dev/null +++ b/queue-5.4/sched-uclamp-protect-uclamp-fast-path-code-with-stat.patch @@ -0,0 +1,323 @@ +From 6e5f9f2ba50c50e42136661cf4dc09f42f763e91 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 28 Aug 2020 13:56:09 +0100 +Subject: sched/uclamp: Protect uclamp fast path code with static key + +From: Qais Yousef + +[ Upstream commit 46609ce227039fd192e0ecc7d940bed587fd2c78 ] + +There is a report that when uclamp is enabled, a netperf UDP test +regresses compared to a kernel compiled without uclamp. + +https://lore.kernel.org/lkml/20200529100806.GA3070@suse.de/ + +While investigating the root cause, there were no sign that the uclamp +code is doing anything particularly expensive but could suffer from bad +cache behavior under certain circumstances that are yet to be +understood. + +https://lore.kernel.org/lkml/20200616110824.dgkkbyapn3io6wik@e107158-lin/ + +To reduce the pressure on the fast path anyway, add a static key that is +by default will skip executing uclamp logic in the +enqueue/dequeue_task() fast path until it's needed. + +As soon as the user start using util clamp by: + + 1. Changing uclamp value of a task with sched_setattr() + 2. Modifying the default sysctl_sched_util_clamp_{min, max} + 3. Modifying the default cpu.uclamp.{min, max} value in cgroup + +We flip the static key now that the user has opted to use util clamp. +Effectively re-introducing uclamp logic in the enqueue/dequeue_task() +fast path. It stays on from that point forward until the next reboot. + +This should help minimize the effect of util clamp on workloads that +don't need it but still allow distros to ship their kernels with uclamp +compiled in by default. + +SCHED_WARN_ON() in uclamp_rq_dec_id() was removed since now we can end +up with unbalanced call to uclamp_rq_dec_id() if we flip the key while +a task is running in the rq. Since we know it is harmless we just +quietly return if we attempt a uclamp_rq_dec_id() when +rq->uclamp[].bucket[].tasks is 0. + +In schedutil, we introduce a new uclamp_is_enabled() helper which takes +the static key into account to ensure RT boosting behavior is retained. + +The following results demonstrates how this helps on 2 Sockets Xeon E5 +2x10-Cores system. + + nouclamp uclamp uclamp-static-key +Hmean send-64 162.43 ( 0.00%) 157.84 * -2.82%* 163.39 * 0.59%* +Hmean send-128 324.71 ( 0.00%) 314.78 * -3.06%* 326.18 * 0.45%* +Hmean send-256 641.55 ( 0.00%) 628.67 * -2.01%* 648.12 * 1.02%* +Hmean send-1024 2525.28 ( 0.00%) 2448.26 * -3.05%* 2543.73 * 0.73%* +Hmean send-2048 4836.14 ( 0.00%) 4712.08 * -2.57%* 4867.69 * 0.65%* +Hmean send-3312 7540.83 ( 0.00%) 7425.45 * -1.53%* 7621.06 * 1.06%* +Hmean send-4096 9124.53 ( 0.00%) 8948.82 * -1.93%* 9276.25 * 1.66%* +Hmean send-8192 15589.67 ( 0.00%) 15486.35 * -0.66%* 15819.98 * 1.48%* +Hmean send-16384 26386.47 ( 0.00%) 25752.25 * -2.40%* 26773.74 * 1.47%* + +The perf diff between nouclamp and uclamp-static-key when uclamp is +disabled in the fast path: + + 8.73% -1.55% [kernel.kallsyms] [k] try_to_wake_up + 0.07% +0.04% [kernel.kallsyms] [k] deactivate_task + 0.13% -0.02% [kernel.kallsyms] [k] activate_task + +The diff between nouclamp and uclamp-static-key when uclamp is enabled +in the fast path: + + 8.73% -0.72% [kernel.kallsyms] [k] try_to_wake_up + 0.13% +0.39% [kernel.kallsyms] [k] activate_task + 0.07% +0.38% [kernel.kallsyms] [k] deactivate_task + +Fixes: 69842cba9ace ("sched/uclamp: Add CPU's clamp buckets refcounting") +Reported-by: Mel Gorman +Signed-off-by: Qais Yousef +Signed-off-by: Peter Zijlstra (Intel) +Tested-by: Lukasz Luba +Link: https://lkml.kernel.org/r/20200630112123.12076-3-qais.yousef@arm.com +[ Fix minor conflict with kernel/sched.h because of function renamed +later ] +Signed-off-by: Qais Yousef +Signed-off-by: Sasha Levin +--- + kernel/sched/core.c | 74 +++++++++++++++++++++++++++++++- + kernel/sched/cpufreq_schedutil.c | 2 +- + kernel/sched/sched.h | 47 +++++++++++++++++++- + 3 files changed, 119 insertions(+), 4 deletions(-) + +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index b34b5c6e25248..a8ab68aa189a9 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -794,6 +794,26 @@ unsigned int sysctl_sched_uclamp_util_max = SCHED_CAPACITY_SCALE; + /* All clamps are required to be less or equal than these values */ + static struct uclamp_se uclamp_default[UCLAMP_CNT]; + ++/* ++ * This static key is used to reduce the uclamp overhead in the fast path. It ++ * primarily disables the call to uclamp_rq_{inc, dec}() in ++ * enqueue/dequeue_task(). ++ * ++ * This allows users to continue to enable uclamp in their kernel config with ++ * minimum uclamp overhead in the fast path. ++ * ++ * As soon as userspace modifies any of the uclamp knobs, the static key is ++ * enabled, since we have an actual users that make use of uclamp ++ * functionality. ++ * ++ * The knobs that would enable this static key are: ++ * ++ * * A task modifying its uclamp value with sched_setattr(). ++ * * An admin modifying the sysctl_sched_uclamp_{min, max} via procfs. ++ * * An admin modifying the cgroup cpu.uclamp.{min, max} ++ */ ++DEFINE_STATIC_KEY_FALSE(sched_uclamp_used); ++ + /* Integer rounded range for each bucket */ + #define UCLAMP_BUCKET_DELTA DIV_ROUND_CLOSEST(SCHED_CAPACITY_SCALE, UCLAMP_BUCKETS) + +@@ -990,10 +1010,38 @@ static inline void uclamp_rq_dec_id(struct rq *rq, struct task_struct *p, + + lockdep_assert_held(&rq->lock); + ++ /* ++ * If sched_uclamp_used was enabled after task @p was enqueued, ++ * we could end up with unbalanced call to uclamp_rq_dec_id(). ++ * ++ * In this case the uc_se->active flag should be false since no uclamp ++ * accounting was performed at enqueue time and we can just return ++ * here. ++ * ++ * Need to be careful of the following enqeueue/dequeue ordering ++ * problem too ++ * ++ * enqueue(taskA) ++ * // sched_uclamp_used gets enabled ++ * enqueue(taskB) ++ * dequeue(taskA) ++ * // Must not decrement bukcet->tasks here ++ * dequeue(taskB) ++ * ++ * where we could end up with stale data in uc_se and ++ * bucket[uc_se->bucket_id]. ++ * ++ * The following check here eliminates the possibility of such race. ++ */ ++ if (unlikely(!uc_se->active)) ++ return; ++ + bucket = &uc_rq->bucket[uc_se->bucket_id]; ++ + SCHED_WARN_ON(!bucket->tasks); + if (likely(bucket->tasks)) + bucket->tasks--; ++ + uc_se->active = false; + + /* +@@ -1021,6 +1069,15 @@ static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p) + { + enum uclamp_id clamp_id; + ++ /* ++ * Avoid any overhead until uclamp is actually used by the userspace. ++ * ++ * The condition is constructed such that a NOP is generated when ++ * sched_uclamp_used is disabled. ++ */ ++ if (!static_branch_unlikely(&sched_uclamp_used)) ++ return; ++ + if (unlikely(!p->sched_class->uclamp_enabled)) + return; + +@@ -1036,6 +1093,15 @@ static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p) + { + enum uclamp_id clamp_id; + ++ /* ++ * Avoid any overhead until uclamp is actually used by the userspace. ++ * ++ * The condition is constructed such that a NOP is generated when ++ * sched_uclamp_used is disabled. ++ */ ++ if (!static_branch_unlikely(&sched_uclamp_used)) ++ return; ++ + if (unlikely(!p->sched_class->uclamp_enabled)) + return; + +@@ -1145,8 +1211,10 @@ int sysctl_sched_uclamp_handler(struct ctl_table *table, int write, + update_root_tg = true; + } + +- if (update_root_tg) ++ if (update_root_tg) { ++ static_branch_enable(&sched_uclamp_used); + uclamp_update_root_tg(); ++ } + + /* + * We update all RUNNABLE tasks only when task groups are in use. +@@ -1211,6 +1279,8 @@ static void __setscheduler_uclamp(struct task_struct *p, + if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP))) + return; + ++ static_branch_enable(&sched_uclamp_used); ++ + if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) { + uclamp_se_set(&p->uclamp_req[UCLAMP_MIN], + attr->sched_util_min, true); +@@ -7294,6 +7364,8 @@ static ssize_t cpu_uclamp_write(struct kernfs_open_file *of, char *buf, + if (req.ret) + return req.ret; + ++ static_branch_enable(&sched_uclamp_used); ++ + mutex_lock(&uclamp_mutex); + rcu_read_lock(); + +diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c +index b6f56e7c8dd16..4cb80e6042c4f 100644 +--- a/kernel/sched/cpufreq_schedutil.c ++++ b/kernel/sched/cpufreq_schedutil.c +@@ -210,7 +210,7 @@ unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs, + unsigned long dl_util, util, irq; + struct rq *rq = cpu_rq(cpu); + +- if (!IS_BUILTIN(CONFIG_UCLAMP_TASK) && ++ if (!uclamp_is_used() && + type == FREQUENCY_UTIL && rt_rq_is_runnable(&rq->rt)) { + return max; + } +diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h +index 570659f1c6e22..9f2a9e34a78d5 100644 +--- a/kernel/sched/sched.h ++++ b/kernel/sched/sched.h +@@ -841,6 +841,8 @@ struct uclamp_rq { + unsigned int value; + struct uclamp_bucket bucket[UCLAMP_BUCKETS]; + }; ++ ++DECLARE_STATIC_KEY_FALSE(sched_uclamp_used); + #endif /* CONFIG_UCLAMP_TASK */ + + /* +@@ -2319,12 +2321,35 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {} + #ifdef CONFIG_UCLAMP_TASK + unsigned int uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id); + ++/** ++ * uclamp_util_with - clamp @util with @rq and @p effective uclamp values. ++ * @rq: The rq to clamp against. Must not be NULL. ++ * @util: The util value to clamp. ++ * @p: The task to clamp against. Can be NULL if you want to clamp ++ * against @rq only. ++ * ++ * Clamps the passed @util to the max(@rq, @p) effective uclamp values. ++ * ++ * If sched_uclamp_used static key is disabled, then just return the util ++ * without any clamping since uclamp aggregation at the rq level in the fast ++ * path is disabled, rendering this operation a NOP. ++ * ++ * Use uclamp_eff_value() if you don't care about uclamp values at rq level. It ++ * will return the correct effective uclamp value of the task even if the ++ * static key is disabled. ++ */ + static __always_inline + unsigned int uclamp_util_with(struct rq *rq, unsigned int util, + struct task_struct *p) + { +- unsigned int min_util = READ_ONCE(rq->uclamp[UCLAMP_MIN].value); +- unsigned int max_util = READ_ONCE(rq->uclamp[UCLAMP_MAX].value); ++ unsigned int min_util; ++ unsigned int max_util; ++ ++ if (!static_branch_likely(&sched_uclamp_used)) ++ return util; ++ ++ min_util = READ_ONCE(rq->uclamp[UCLAMP_MIN].value); ++ max_util = READ_ONCE(rq->uclamp[UCLAMP_MAX].value); + + if (p) { + min_util = max(min_util, uclamp_eff_value(p, UCLAMP_MIN)); +@@ -2346,6 +2371,19 @@ static inline unsigned int uclamp_util(struct rq *rq, unsigned int util) + { + return uclamp_util_with(rq, util, NULL); + } ++ ++/* ++ * When uclamp is compiled in, the aggregation at rq level is 'turned off' ++ * by default in the fast path and only gets turned on once userspace performs ++ * an operation that requires it. ++ * ++ * Returns true if userspace opted-in to use uclamp and aggregation at rq level ++ * hence is active. ++ */ ++static inline bool uclamp_is_used(void) ++{ ++ return static_branch_likely(&sched_uclamp_used); ++} + #else /* CONFIG_UCLAMP_TASK */ + static inline unsigned int uclamp_util_with(struct rq *rq, unsigned int util, + struct task_struct *p) +@@ -2356,6 +2394,11 @@ static inline unsigned int uclamp_util(struct rq *rq, unsigned int util) + { + return util; + } ++ ++static inline bool uclamp_is_used(void) ++{ ++ return false; ++} + #endif /* CONFIG_UCLAMP_TASK */ + + #ifdef arch_scale_freq_capacity +-- +2.25.1 + diff --git a/queue-5.4/series b/queue-5.4/series index 08706ce898e..ba70b0b35fa 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -64,3 +64,6 @@ usb-sisusbvga-fix-a-potential-ub-casued-by-left-shif.patch brcmfmac-set-timeout-value-when-configuring-power-sa.patch efi-provide-empty-efi_enter_virtual_mode-implementat.patch arm64-fix-__cpu_logical_map-undefined-issue.patch +revert-ath10k-fix-dma-related-firmware-crashes-on-mu.patch +sched-uclamp-protect-uclamp-fast-path-code-with-stat.patch +sched-uclamp-fix-a-deadlock-when-enabling-uclamp-sta.patch