From: Sasha Levin Date: Thu, 12 Dec 2024 01:10:55 +0000 (-0500) Subject: Fixes for 5.10 X-Git-Tag: v5.4.287~60 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=663f268f23a9beb95f96c2cf589d89890ebf5a37;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 5.10 Signed-off-by: Sasha Levin --- diff --git a/queue-5.10/btrfs-fix-missing-snapshot-drew-unlock-when-root-is-.patch b/queue-5.10/btrfs-fix-missing-snapshot-drew-unlock-when-root-is-.patch new file mode 100644 index 00000000000..fbd7a370d99 --- /dev/null +++ b/queue-5.10/btrfs-fix-missing-snapshot-drew-unlock-when-root-is-.patch @@ -0,0 +1,41 @@ +From 9f81587b787fcc2527b251a16f67e9581b852263 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 29 Nov 2024 13:33:03 +0000 +Subject: btrfs: fix missing snapshot drew unlock when root is dead during swap + activation + +From: Filipe Manana + +[ Upstream commit 9c803c474c6c002d8ade68ebe99026cc39c37f85 ] + +When activating a swap file we acquire the root's snapshot drew lock and +then check if the root is dead, failing and returning with -EPERM if it's +dead but without unlocking the root's snapshot lock. Fix this by adding +the missing unlock. + +Fixes: 60021bd754c6 ("btrfs: prevent subvol with swapfile from being deleted") +Reviewed-by: Johannes Thumshirn +Reviewed-by: David Sterba +Reviewed-by: Qu Wenruo +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +--- + fs/btrfs/inode.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c +index 24598acb9a314..eba87f2936d2c 100644 +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -10283,6 +10283,7 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file, + if (btrfs_root_dead(root)) { + spin_unlock(&root->root_item_lock); + ++ btrfs_drew_write_unlock(&root->snapshot_lock); + btrfs_exclop_finish(fs_info); + btrfs_warn(fs_info, + "cannot activate swapfile because subvolume %llu is being deleted", +-- +2.43.0 + diff --git a/queue-5.10/sched-core-prevent-wakeup-of-ksoftirqd-during-idle-l.patch b/queue-5.10/sched-core-prevent-wakeup-of-ksoftirqd-during-idle-l.patch new file mode 100644 index 00000000000..0b4c2b422c5 --- /dev/null +++ b/queue-5.10/sched-core-prevent-wakeup-of-ksoftirqd-during-idle-l.patch @@ -0,0 +1,71 @@ +From 96b450db25e41072ecaa1219b75be13cf1c7ace9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 19 Nov 2024 05:44:32 +0000 +Subject: sched/core: Prevent wakeup of ksoftirqd during idle load balance + +From: K Prateek Nayak + +[ Upstream commit e932c4ab38f072ce5894b2851fea8bc5754bb8e5 ] + +Scheduler raises a SCHED_SOFTIRQ to trigger a load balancing event on +from the IPI handler on the idle CPU. If the SMP function is invoked +from an idle CPU via flush_smp_call_function_queue() then the HARD-IRQ +flag is not set and raise_softirq_irqoff() needlessly wakes ksoftirqd +because soft interrupts are handled before ksoftirqd get on the CPU. + +Adding a trace_printk() in nohz_csd_func() at the spot of raising +SCHED_SOFTIRQ and enabling trace events for sched_switch, sched_wakeup, +and softirq_entry (for SCHED_SOFTIRQ vector alone) helps observing the +current behavior: + + -0 [000] dN.1.: nohz_csd_func: Raising SCHED_SOFTIRQ from nohz_csd_func + -0 [000] dN.4.: sched_wakeup: comm=ksoftirqd/0 pid=16 prio=120 target_cpu=000 + -0 [000] .Ns1.: softirq_entry: vec=7 [action=SCHED] + -0 [000] .Ns1.: softirq_exit: vec=7 [action=SCHED] + -0 [000] d..2.: sched_switch: prev_comm=swapper/0 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=ksoftirqd/0 next_pid=16 next_prio=120 + ksoftirqd/0-16 [000] d..2.: sched_switch: prev_comm=ksoftirqd/0 prev_pid=16 prev_prio=120 prev_state=S ==> next_comm=swapper/0 next_pid=0 next_prio=120 + ... + +Use __raise_softirq_irqoff() to raise the softirq. The SMP function call +is always invoked on the requested CPU in an interrupt handler. It is +guaranteed that soft interrupts are handled at the end. + +Following are the observations with the changes when enabling the same +set of events: + + -0 [000] dN.1.: nohz_csd_func: Raising SCHED_SOFTIRQ for nohz_idle_balance + -0 [000] dN.1.: softirq_raise: vec=7 [action=SCHED] + -0 [000] .Ns1.: softirq_entry: vec=7 [action=SCHED] + +No unnecessary ksoftirqd wakeups are seen from idle task's context to +service the softirq. + +Fixes: b2a02fc43a1f ("smp: Optimize send_call_function_single_ipi()") +Closes: https://lore.kernel.org/lkml/fcf823f-195e-6c9a-eac3-25f870cb35ac@inria.fr/ [1] +Reported-by: Julia Lawall +Suggested-by: Sebastian Andrzej Siewior +Signed-off-by: K Prateek Nayak +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Sebastian Andrzej Siewior +Link: https://lore.kernel.org/r/20241119054432.6405-5-kprateek.nayak@amd.com +Signed-off-by: Sasha Levin +--- + kernel/sched/core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index 1f4bf91c27d22..7cf45d506688c 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -752,7 +752,7 @@ static void nohz_csd_func(void *info) + rq->idle_balance = idle_cpu(cpu); + if (rq->idle_balance) { + rq->nohz_idle_balance = flags; +- raise_softirq_irqoff(SCHED_SOFTIRQ); ++ __raise_softirq_irqoff(SCHED_SOFTIRQ); + } + } + +-- +2.43.0 + diff --git a/queue-5.10/sched-core-remove-the-unnecessary-need_resched-check.patch b/queue-5.10/sched-core-remove-the-unnecessary-need_resched-check.patch new file mode 100644 index 00000000000..9c91214552e --- /dev/null +++ b/queue-5.10/sched-core-remove-the-unnecessary-need_resched-check.patch @@ -0,0 +1,122 @@ +From c6729d6eac04e2570b1043dd8322be448456c93a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 19 Nov 2024 05:44:30 +0000 +Subject: sched/core: Remove the unnecessary need_resched() check in + nohz_csd_func() + +From: K Prateek Nayak + +[ Upstream commit ea9cffc0a154124821531991d5afdd7e8b20d7aa ] + +The need_resched() check currently in nohz_csd_func() can be tracked +to have been added in scheduler_ipi() back in 2011 via commit +ca38062e57e9 ("sched: Use resched IPI to kick off the nohz idle balance") + +Since then, it has travelled quite a bit but it seems like an idle_cpu() +check currently is sufficient to detect the need to bail out from an +idle load balancing. To justify this removal, consider all the following +case where an idle load balancing could race with a task wakeup: + +o Since commit f3dd3f674555b ("sched: Remove the limitation of WF_ON_CPU + on wakelist if wakee cpu is idle") a target perceived to be idle + (target_rq->nr_running == 0) will return true for + ttwu_queue_cond(target) which will offload the task wakeup to the idle + target via an IPI. + + In all such cases target_rq->ttwu_pending will be set to 1 before + queuing the wake function. + + If an idle load balance races here, following scenarios are possible: + + - The CPU is not in TIF_POLLING_NRFLAG mode in which case an actual + IPI is sent to the CPU to wake it out of idle. If the + nohz_csd_func() queues before sched_ttwu_pending(), the idle load + balance will bail out since idle_cpu(target) returns 0 since + target_rq->ttwu_pending is 1. If the nohz_csd_func() is queued after + sched_ttwu_pending() it should see rq->nr_running to be non-zero and + bail out of idle load balancing. + + - The CPU is in TIF_POLLING_NRFLAG mode and instead of an actual IPI, + the sender will simply set TIF_NEED_RESCHED for the target to put it + out of idle and flush_smp_call_function_queue() in do_idle() will + execute the call function. Depending on the ordering of the queuing + of nohz_csd_func() and sched_ttwu_pending(), the idle_cpu() check in + nohz_csd_func() should either see target_rq->ttwu_pending = 1 or + target_rq->nr_running to be non-zero if there is a genuine task + wakeup racing with the idle load balance kick. + +o The waker CPU perceives the target CPU to be busy + (targer_rq->nr_running != 0) but the CPU is in fact going idle and due + to a series of unfortunate events, the system reaches a case where the + waker CPU decides to perform the wakeup by itself in ttwu_queue() on + the target CPU but target is concurrently selected for idle load + balance (XXX: Can this happen? I'm not sure, but we'll consider the + mother of all coincidences to estimate the worst case scenario). + + ttwu_do_activate() calls enqueue_task() which would increment + "rq->nr_running" post which it calls wakeup_preempt() which is + responsible for setting TIF_NEED_RESCHED (via a resched IPI or by + setting TIF_NEED_RESCHED on a TIF_POLLING_NRFLAG idle CPU) The key + thing to note in this case is that rq->nr_running is already non-zero + in case of a wakeup before TIF_NEED_RESCHED is set which would + lead to idle_cpu() check returning false. + +In all cases, it seems that need_resched() check is unnecessary when +checking for idle_cpu() first since an impending wakeup racing with idle +load balancer will either set the "rq->ttwu_pending" or indicate a newly +woken task via "rq->nr_running". + +Chasing the reason why this check might have existed in the first place, +I came across Peter's suggestion on the fist iteration of Suresh's +patch from 2011 [1] where the condition to raise the SCHED_SOFTIRQ was: + + sched_ttwu_do_pending(list); + + if (unlikely((rq->idle == current) && + rq->nohz_balance_kick && + !need_resched())) + raise_softirq_irqoff(SCHED_SOFTIRQ); + +Since the condition to raise the SCHED_SOFIRQ was preceded by +sched_ttwu_do_pending() (which is equivalent of sched_ttwu_pending()) in +the current upstream kernel, the need_resched() check was necessary to +catch a newly queued task. Peter suggested modifying it to: + + if (idle_cpu() && rq->nohz_balance_kick && !need_resched()) + raise_softirq_irqoff(SCHED_SOFTIRQ); + +where idle_cpu() seems to have replaced "rq->idle == current" check. + +Even back then, the idle_cpu() check would have been sufficient to catch +a new task being enqueued. Since commit b2a02fc43a1f ("smp: Optimize +send_call_function_single_ipi()") overloads the interpretation of +TIF_NEED_RESCHED for TIF_POLLING_NRFLAG idling, remove the +need_resched() check in nohz_csd_func() to raise SCHED_SOFTIRQ based +on Peter's suggestion. + +Fixes: b2a02fc43a1f ("smp: Optimize send_call_function_single_ipi()") +Suggested-by: Peter Zijlstra +Signed-off-by: K Prateek Nayak +Signed-off-by: Peter Zijlstra (Intel) +Link: https://lore.kernel.org/r/20241119054432.6405-3-kprateek.nayak@amd.com +Signed-off-by: Sasha Levin +--- + kernel/sched/core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index 29d8fc3a7bbd2..8e30041cecf94 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -750,7 +750,7 @@ static void nohz_csd_func(void *info) + WARN_ON(!(flags & NOHZ_KICK_MASK)); + + rq->idle_balance = idle_cpu(cpu); +- if (rq->idle_balance && !need_resched()) { ++ if (rq->idle_balance) { + rq->nohz_idle_balance = flags; + raise_softirq_irqoff(SCHED_SOFTIRQ); + } +-- +2.43.0 + diff --git a/queue-5.10/sched-fair-add-nohz-balancer-flag-for-nohz.next_bala.patch b/queue-5.10/sched-fair-add-nohz-balancer-flag-for-nohz.next_bala.patch new file mode 100644 index 00000000000..fc30ad89001 --- /dev/null +++ b/queue-5.10/sched-fair-add-nohz-balancer-flag-for-nohz.next_bala.patch @@ -0,0 +1,147 @@ +From 5cd103305979a38803302ff2901d488f34ad2eb1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 23 Aug 2021 12:16:59 +0100 +Subject: sched/fair: Add NOHZ balancer flag for nohz.next_balance updates + +From: Valentin Schneider + +[ Upstream commit efd984c481abb516fab8bafb25bf41fd9397a43c ] + +A following patch will trigger NOHZ idle balances as a means to update +nohz.next_balance. Vincent noted that blocked load updates can have +non-negligible overhead, which should be avoided if the intent is to only +update nohz.next_balance. + +Add a new NOHZ balance kick flag, NOHZ_NEXT_KICK. Gate NOHZ blocked load +update by the presence of NOHZ_STATS_KICK - currently all NOHZ balance +kicks will have the NOHZ_STATS_KICK flag set, so no change in behaviour is +expected. + +Suggested-by: Vincent Guittot +Signed-off-by: Valentin Schneider +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Vincent Guittot +Link: https://lkml.kernel.org/r/20210823111700.2842997-2-valentin.schneider@arm.com +Stable-dep-of: ff47a0acfcce ("sched/fair: Check idle_cpu() before need_resched() to detect ilb CPU turning busy") +Signed-off-by: Sasha Levin +--- + kernel/sched/fair.c | 24 ++++++++++++++---------- + kernel/sched/sched.h | 8 +++++++- + 2 files changed, 21 insertions(+), 11 deletions(-) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 8121cfd60b8fb..e2116e3d593ec 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -10438,7 +10438,7 @@ static void nohz_balancer_kick(struct rq *rq) + goto out; + + if (rq->nr_running >= 2) { +- flags = NOHZ_KICK_MASK; ++ flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK; + goto out; + } + +@@ -10452,7 +10452,7 @@ static void nohz_balancer_kick(struct rq *rq) + * on. + */ + if (rq->cfs.h_nr_running >= 1 && check_cpu_capacity(rq, sd)) { +- flags = NOHZ_KICK_MASK; ++ flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK; + goto unlock; + } + } +@@ -10466,7 +10466,7 @@ static void nohz_balancer_kick(struct rq *rq) + */ + for_each_cpu_and(i, sched_domain_span(sd), nohz.idle_cpus_mask) { + if (sched_asym_prefer(i, cpu)) { +- flags = NOHZ_KICK_MASK; ++ flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK; + goto unlock; + } + } +@@ -10479,7 +10479,7 @@ static void nohz_balancer_kick(struct rq *rq) + * to run the misfit task on. + */ + if (check_misfit_status(rq, sd)) { +- flags = NOHZ_KICK_MASK; ++ flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK; + goto unlock; + } + +@@ -10506,7 +10506,7 @@ static void nohz_balancer_kick(struct rq *rq) + */ + nr_busy = atomic_read(&sds->nr_busy_cpus); + if (nr_busy > 1) { +- flags = NOHZ_KICK_MASK; ++ flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK; + goto unlock; + } + } +@@ -10653,7 +10653,8 @@ static bool _nohz_idle_balance(struct rq *this_rq, unsigned int flags, + * setting the flag, we are sure to not clear the state and not + * check the load of an idle cpu. + */ +- WRITE_ONCE(nohz.has_blocked, 0); ++ if (flags & NOHZ_STATS_KICK) ++ WRITE_ONCE(nohz.has_blocked, 0); + + /* + * Ensures that if we miss the CPU, we must see the has_blocked +@@ -10675,13 +10676,15 @@ static bool _nohz_idle_balance(struct rq *this_rq, unsigned int flags, + * balancing owner will pick it up. + */ + if (need_resched()) { +- has_blocked_load = true; ++ if (flags & NOHZ_STATS_KICK) ++ has_blocked_load = true; + goto abort; + } + + rq = cpu_rq(balance_cpu); + +- has_blocked_load |= update_nohz_stats(rq); ++ if (flags & NOHZ_STATS_KICK) ++ has_blocked_load |= update_nohz_stats(rq); + + /* + * If time for next balance is due, +@@ -10712,8 +10715,9 @@ static bool _nohz_idle_balance(struct rq *this_rq, unsigned int flags, + if (likely(update_next_balance)) + nohz.next_balance = next_balance; + +- WRITE_ONCE(nohz.next_blocked, +- now + msecs_to_jiffies(LOAD_AVG_PERIOD)); ++ if (flags & NOHZ_STATS_KICK) ++ WRITE_ONCE(nohz.next_blocked, ++ now + msecs_to_jiffies(LOAD_AVG_PERIOD)); + + /* The full idle balance loop has been done */ + ret = true; +diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h +index 66e3ecb7c10e4..5f17507bd66b8 100644 +--- a/kernel/sched/sched.h ++++ b/kernel/sched/sched.h +@@ -2293,12 +2293,18 @@ extern void cfs_bandwidth_usage_dec(void); + #define NOHZ_BALANCE_KICK_BIT 0 + #define NOHZ_STATS_KICK_BIT 1 + #define NOHZ_NEWILB_KICK_BIT 2 ++#define NOHZ_NEXT_KICK_BIT 3 + ++/* Run rebalance_domains() */ + #define NOHZ_BALANCE_KICK BIT(NOHZ_BALANCE_KICK_BIT) ++/* Update blocked load */ + #define NOHZ_STATS_KICK BIT(NOHZ_STATS_KICK_BIT) ++/* Update blocked load when entering idle */ + #define NOHZ_NEWILB_KICK BIT(NOHZ_NEWILB_KICK_BIT) ++/* Update nohz.next_balance */ ++#define NOHZ_NEXT_KICK BIT(NOHZ_NEXT_KICK_BIT) + +-#define NOHZ_KICK_MASK (NOHZ_BALANCE_KICK | NOHZ_STATS_KICK) ++#define NOHZ_KICK_MASK (NOHZ_BALANCE_KICK | NOHZ_STATS_KICK | NOHZ_NEXT_KICK) + + #define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags) + +-- +2.43.0 + diff --git a/queue-5.10/sched-fair-check-idle_cpu-before-need_resched-to-det.patch b/queue-5.10/sched-fair-check-idle_cpu-before-need_resched-to-det.patch new file mode 100644 index 00000000000..f9bad566700 --- /dev/null +++ b/queue-5.10/sched-fair-check-idle_cpu-before-need_resched-to-det.patch @@ -0,0 +1,60 @@ +From 5f41ec38490ab87b8f4281161d5ab93e32a3baeb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 19 Nov 2024 05:44:31 +0000 +Subject: sched/fair: Check idle_cpu() before need_resched() to detect ilb CPU + turning busy + +From: K Prateek Nayak + +[ Upstream commit ff47a0acfcce309cf9e175149c75614491953c8f ] + +Commit b2a02fc43a1f ("smp: Optimize send_call_function_single_ipi()") +optimizes IPIs to idle CPUs in TIF_POLLING_NRFLAG mode by setting the +TIF_NEED_RESCHED flag in idle task's thread info and relying on +flush_smp_call_function_queue() in idle exit path to run the +call-function. A softirq raised by the call-function is handled shortly +after in do_softirq_post_smp_call_flush() but the TIF_NEED_RESCHED flag +remains set and is only cleared later when schedule_idle() calls +__schedule(). + +need_resched() check in _nohz_idle_balance() exists to bail out of load +balancing if another task has woken up on the CPU currently in-charge of +idle load balancing which is being processed in SCHED_SOFTIRQ context. +Since the optimization mentioned above overloads the interpretation of +TIF_NEED_RESCHED, check for idle_cpu() before going with the existing +need_resched() check which can catch a genuine task wakeup on an idle +CPU processing SCHED_SOFTIRQ from do_softirq_post_smp_call_flush(), as +well as the case where ksoftirqd needs to be preempted as a result of +new task wakeup or slice expiry. + +In case of PREEMPT_RT or threadirqs, although the idle load balancing +may be inhibited in some cases on the ilb CPU, the fact that ksoftirqd +is the only fair task going back to sleep will trigger a newidle balance +on the CPU which will alleviate some imbalance if it exists if idle +balance fails to do so. + +Fixes: b2a02fc43a1f ("smp: Optimize send_call_function_single_ipi()") +Signed-off-by: K Prateek Nayak +Signed-off-by: Peter Zijlstra (Intel) +Link: https://lore.kernel.org/r/20241119054432.6405-4-kprateek.nayak@amd.com +Signed-off-by: Sasha Levin +--- + kernel/sched/fair.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index e2116e3d593ec..9f8cb265589b3 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -10675,7 +10675,7 @@ static bool _nohz_idle_balance(struct rq *this_rq, unsigned int flags, + * work being done for other CPUs. Next load + * balancing owner will pick it up. + */ +- if (need_resched()) { ++ if (!idle_cpu(this_cpu) && need_resched()) { + if (flags & NOHZ_STATS_KICK) + has_blocked_load = true; + goto abort; +-- +2.43.0 + diff --git a/queue-5.10/sched-fair-merge-for-each-idle-cpu-loop-of-ilb.patch b/queue-5.10/sched-fair-merge-for-each-idle-cpu-loop-of-ilb.patch new file mode 100644 index 00000000000..9a15e4fd237 --- /dev/null +++ b/queue-5.10/sched-fair-merge-for-each-idle-cpu-loop-of-ilb.patch @@ -0,0 +1,91 @@ +From 2c353fb77d9fe2fbdb486d7ae22236f70374e129 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 24 Feb 2021 14:30:04 +0100 +Subject: sched/fair: Merge for each idle cpu loop of ILB + +From: Vincent Guittot + +[ Upstream commit 7a82e5f52a3506bc35a4dc04d53ad2c9daf82e7f ] + +Remove the specific case for handling this_cpu outside for_each_cpu() loop +when running ILB. Instead we use for_each_cpu_wrap() and start with the +next cpu after this_cpu so we will continue to finish with this_cpu. + +update_nohz_stats() is now used for this_cpu too and will prevents +unnecessary update. We don't need a special case for handling the update of +nohz.next_balance for this_cpu anymore because it is now handled by the +loop like others. + +Signed-off-by: Vincent Guittot +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Ingo Molnar +Reviewed-by: Valentin Schneider +Link: https://lkml.kernel.org/r/20210224133007.28644-5-vincent.guittot@linaro.org +Stable-dep-of: ff47a0acfcce ("sched/fair: Check idle_cpu() before need_resched() to detect ilb CPU turning busy") +Signed-off-by: Sasha Levin +--- + kernel/sched/fair.c | 32 +++++++------------------------- + 1 file changed, 7 insertions(+), 25 deletions(-) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 0af373c4d7450..ab29666eb50ed 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -10333,22 +10333,9 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle) + * When the cpu is attached to null domain for ex, it will not be + * updated. + */ +- if (likely(update_next_balance)) { ++ if (likely(update_next_balance)) + rq->next_balance = next_balance; + +-#ifdef CONFIG_NO_HZ_COMMON +- /* +- * If this CPU has been elected to perform the nohz idle +- * balance. Other idle CPUs have already rebalanced with +- * nohz_idle_balance() and nohz.next_balance has been +- * updated accordingly. This CPU is now running the idle load +- * balance for itself and we need to update the +- * nohz.next_balance accordingly. +- */ +- if ((idle == CPU_IDLE) && time_after(nohz.next_balance, rq->next_balance)) +- nohz.next_balance = rq->next_balance; +-#endif +- } + } + + static inline int on_null_domain(struct rq *rq) +@@ -10674,8 +10661,12 @@ static bool _nohz_idle_balance(struct rq *this_rq, unsigned int flags, + */ + smp_mb(); + +- for_each_cpu(balance_cpu, nohz.idle_cpus_mask) { +- if (balance_cpu == this_cpu || !idle_cpu(balance_cpu)) ++ /* ++ * Start with the next CPU after this_cpu so we will end with this_cpu and let a ++ * chance for other idle cpu to pull load. ++ */ ++ for_each_cpu_wrap(balance_cpu, nohz.idle_cpus_mask, this_cpu+1) { ++ if (!idle_cpu(balance_cpu)) + continue; + + /* +@@ -10721,15 +10712,6 @@ static bool _nohz_idle_balance(struct rq *this_rq, unsigned int flags, + if (likely(update_next_balance)) + nohz.next_balance = next_balance; + +- /* Newly idle CPU doesn't need an update */ +- if (idle != CPU_NEWLY_IDLE) { +- update_blocked_averages(this_cpu); +- has_blocked_load |= this_rq->has_blocked_load; +- } +- +- if (flags & NOHZ_BALANCE_KICK) +- rebalance_domains(this_rq, CPU_IDLE); +- + WRITE_ONCE(nohz.next_blocked, + now + msecs_to_jiffies(LOAD_AVG_PERIOD)); + +-- +2.43.0 + diff --git a/queue-5.10/sched-fair-remove-unused-parameter-of-update_nohz_st.patch b/queue-5.10/sched-fair-remove-unused-parameter-of-update_nohz_st.patch new file mode 100644 index 00000000000..23761059602 --- /dev/null +++ b/queue-5.10/sched-fair-remove-unused-parameter-of-update_nohz_st.patch @@ -0,0 +1,57 @@ +From 326ba88b7561f78c7ba2444e50db1bf108323cef Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 24 Feb 2021 14:30:03 +0100 +Subject: sched/fair: Remove unused parameter of update_nohz_stats + +From: Vincent Guittot + +[ Upstream commit 64f84f273592d17dcdca20244168ad9f525a39c3 ] + +idle load balance is the only user of update_nohz_stats and doesn't use +force parameter. Remove it + +Signed-off-by: Vincent Guittot +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Ingo Molnar +Reviewed-by: Valentin Schneider +Link: https://lkml.kernel.org/r/20210224133007.28644-4-vincent.guittot@linaro.org +Stable-dep-of: ff47a0acfcce ("sched/fair: Check idle_cpu() before need_resched() to detect ilb CPU turning busy") +Signed-off-by: Sasha Levin +--- + kernel/sched/fair.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index a6e34c58cee92..0af373c4d7450 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -8650,7 +8650,7 @@ group_type group_classify(unsigned int imbalance_pct, + return group_has_spare; + } + +-static bool update_nohz_stats(struct rq *rq, bool force) ++static bool update_nohz_stats(struct rq *rq) + { + #ifdef CONFIG_NO_HZ_COMMON + unsigned int cpu = rq->cpu; +@@ -8661,7 +8661,7 @@ static bool update_nohz_stats(struct rq *rq, bool force) + if (!cpumask_test_cpu(cpu, nohz.idle_cpus_mask)) + return false; + +- if (!force && !time_after(jiffies, rq->last_blocked_load_update_tick)) ++ if (!time_after(jiffies, rq->last_blocked_load_update_tick)) + return true; + + update_blocked_averages(cpu); +@@ -10690,7 +10690,7 @@ static bool _nohz_idle_balance(struct rq *this_rq, unsigned int flags, + + rq = cpu_rq(balance_cpu); + +- has_blocked_load |= update_nohz_stats(rq, true); ++ has_blocked_load |= update_nohz_stats(rq); + + /* + * If time for next balance is due, +-- +2.43.0 + diff --git a/queue-5.10/sched-fair-remove-update-of-blocked-load-from-newidl.patch b/queue-5.10/sched-fair-remove-update-of-blocked-load-from-newidl.patch new file mode 100644 index 00000000000..67376158cc5 --- /dev/null +++ b/queue-5.10/sched-fair-remove-update-of-blocked-load-from-newidl.patch @@ -0,0 +1,123 @@ +From 50a650029b8d6b6f26bdffa091b8c3935faeb9c2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 24 Feb 2021 14:30:01 +0100 +Subject: sched/fair: Remove update of blocked load from newidle_balance + +From: Vincent Guittot + +[ Upstream commit 0826530de3cbdc89e60a89e86def94a5f0fc81ca ] + +newidle_balance runs with both preempt and irq disabled which prevent +local irq to run during this period. The duration for updating the +blocked load of CPUs varies according to the number of CPU cgroups +with non-decayed load and extends this critical period to an uncontrolled +level. + +Remove the update from newidle_balance and trigger a normal ILB that +will take care of the update instead. + +This reduces the IRQ latency from O(nr_cgroups * nr_nohz_cpus) to +O(nr_cgroups). + +Signed-off-by: Vincent Guittot +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Ingo Molnar +Reviewed-by: Valentin Schneider +Link: https://lkml.kernel.org/r/20210224133007.28644-2-vincent.guittot@linaro.org +Stable-dep-of: ff47a0acfcce ("sched/fair: Check idle_cpu() before need_resched() to detect ilb CPU turning busy") +Signed-off-by: Sasha Levin +--- + kernel/sched/fair.c | 33 +++++---------------------------- + 1 file changed, 5 insertions(+), 28 deletions(-) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index a6a755aec32b5..a6e34c58cee92 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -7621,8 +7621,6 @@ enum migration_type { + #define LBF_NEED_BREAK 0x02 + #define LBF_DST_PINNED 0x04 + #define LBF_SOME_PINNED 0x08 +-#define LBF_NOHZ_STATS 0x10 +-#define LBF_NOHZ_AGAIN 0x20 + + struct lb_env { + struct sched_domain *sd; +@@ -8695,9 +8693,6 @@ static inline void update_sg_lb_stats(struct lb_env *env, + for_each_cpu_and(i, sched_group_span(group), env->cpus) { + struct rq *rq = cpu_rq(i); + +- if ((env->flags & LBF_NOHZ_STATS) && update_nohz_stats(rq, false)) +- env->flags |= LBF_NOHZ_AGAIN; +- + sgs->group_load += cpu_load(rq); + sgs->group_util += cpu_util(i); + sgs->group_runnable += cpu_runnable(rq); +@@ -9230,11 +9225,6 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd + struct sg_lb_stats tmp_sgs; + int sg_status = 0; + +-#ifdef CONFIG_NO_HZ_COMMON +- if (env->idle == CPU_NEWLY_IDLE && READ_ONCE(nohz.has_blocked)) +- env->flags |= LBF_NOHZ_STATS; +-#endif +- + do { + struct sg_lb_stats *sgs = &tmp_sgs; + int local_group; +@@ -9271,14 +9261,6 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd + /* Tag domain that child domain prefers tasks go to siblings first */ + sds->prefer_sibling = child && child->flags & SD_PREFER_SIBLING; + +-#ifdef CONFIG_NO_HZ_COMMON +- if ((env->flags & LBF_NOHZ_AGAIN) && +- cpumask_subset(nohz.idle_cpus_mask, sched_domain_span(env->sd))) { +- +- WRITE_ONCE(nohz.next_blocked, +- jiffies + msecs_to_jiffies(LOAD_AVG_PERIOD)); +- } +-#endif + + if (env->sd->flags & SD_NUMA) + env->fbq_type = fbq_classify_group(&sds->busiest_stat); +@@ -10803,16 +10785,11 @@ static void nohz_newidle_balance(struct rq *this_rq) + time_before(jiffies, READ_ONCE(nohz.next_blocked))) + return; + +- raw_spin_unlock(&this_rq->lock); + /* +- * This CPU is going to be idle and blocked load of idle CPUs +- * need to be updated. Run the ilb locally as it is a good +- * candidate for ilb instead of waking up another idle CPU. +- * Kick an normal ilb if we failed to do the update. ++ * Blocked load of idle CPUs need to be updated. ++ * Kick an ILB to update statistics. + */ +- if (!_nohz_idle_balance(this_rq, NOHZ_STATS_KICK, CPU_NEWLY_IDLE)) +- kick_ilb(NOHZ_STATS_KICK); +- raw_spin_lock(&this_rq->lock); ++ kick_ilb(NOHZ_STATS_KICK); + } + + #else /* !CONFIG_NO_HZ_COMMON */ +@@ -10873,8 +10850,6 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf) + update_next_balance(sd, &next_balance); + rcu_read_unlock(); + +- nohz_newidle_balance(this_rq); +- + goto out; + } + +@@ -10940,6 +10915,8 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf) + + if (pulled_task) + this_rq->idle_stamp = 0; ++ else ++ nohz_newidle_balance(this_rq); + + rq_repin_lock(this_rq, rf); + +-- +2.43.0 + diff --git a/queue-5.10/sched-fair-trigger-the-update-of-blocked-load-on-new.patch b/queue-5.10/sched-fair-trigger-the-update-of-blocked-load-on-new.patch new file mode 100644 index 00000000000..373675abf77 --- /dev/null +++ b/queue-5.10/sched-fair-trigger-the-update-of-blocked-load-on-new.patch @@ -0,0 +1,131 @@ +From 0f5cd7f09da695d3110904e6c147e94988d78848 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 24 Feb 2021 14:30:06 +0100 +Subject: sched/fair: Trigger the update of blocked load on newly idle cpu + +From: Vincent Guittot + +[ Upstream commit c6f886546cb8a38617cdbe755fe50d3acd2463e4 ] + +Instead of waking up a random and already idle CPU, we can take advantage +of this_cpu being about to enter idle to run the ILB and update the +blocked load. + +Signed-off-by: Vincent Guittot +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Ingo Molnar +Reviewed-by: Valentin Schneider +Link: https://lkml.kernel.org/r/20210224133007.28644-7-vincent.guittot@linaro.org +Stable-dep-of: ff47a0acfcce ("sched/fair: Check idle_cpu() before need_resched() to detect ilb CPU turning busy") +Signed-off-by: Sasha Levin +--- + kernel/sched/core.c | 2 +- + kernel/sched/fair.c | 24 +++++++++++++++++++++--- + kernel/sched/idle.c | 6 ++++++ + kernel/sched/sched.h | 7 +++++++ + 4 files changed, 35 insertions(+), 4 deletions(-) + +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index 8e30041cecf94..1f4bf91c27d22 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -746,7 +746,7 @@ static void nohz_csd_func(void *info) + /* + * Release the rq::nohz_csd. + */ +- flags = atomic_fetch_andnot(NOHZ_KICK_MASK, nohz_flags(cpu)); ++ flags = atomic_fetch_andnot(NOHZ_KICK_MASK | NOHZ_NEWILB_KICK, nohz_flags(cpu)); + WARN_ON(!(flags & NOHZ_KICK_MASK)); + + rq->idle_balance = idle_cpu(cpu); +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index ab29666eb50ed..8121cfd60b8fb 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -10747,6 +10747,24 @@ static bool nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle) + return true; + } + ++/* ++ * Check if we need to run the ILB for updating blocked load before entering ++ * idle state. ++ */ ++void nohz_run_idle_balance(int cpu) ++{ ++ unsigned int flags; ++ ++ flags = atomic_fetch_andnot(NOHZ_NEWILB_KICK, nohz_flags(cpu)); ++ ++ /* ++ * Update the blocked load only if no SCHED_SOFTIRQ is about to happen ++ * (ie NOHZ_STATS_KICK set) and will do the same. ++ */ ++ if ((flags == NOHZ_NEWILB_KICK) && !need_resched()) ++ _nohz_idle_balance(cpu_rq(cpu), NOHZ_STATS_KICK, CPU_IDLE); ++} ++ + static void nohz_newidle_balance(struct rq *this_rq) + { + int this_cpu = this_rq->cpu; +@@ -10768,10 +10786,10 @@ static void nohz_newidle_balance(struct rq *this_rq) + return; + + /* +- * Blocked load of idle CPUs need to be updated. +- * Kick an ILB to update statistics. ++ * Set the need to trigger ILB in order to update blocked load ++ * before entering idle state. + */ +- kick_ilb(NOHZ_STATS_KICK); ++ atomic_or(NOHZ_NEWILB_KICK, nohz_flags(this_cpu)); + } + + #else /* !CONFIG_NO_HZ_COMMON */ +diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c +index 2593a733c0849..cdc3e690de714 100644 +--- a/kernel/sched/idle.c ++++ b/kernel/sched/idle.c +@@ -261,6 +261,12 @@ static void cpuidle_idle_call(void) + static void do_idle(void) + { + int cpu = smp_processor_id(); ++ ++ /* ++ * Check if we need to update blocked load ++ */ ++ nohz_run_idle_balance(cpu); ++ + /* + * If the arch has a polling bit, we maintain an invariant: + * +diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h +index df6cf8aa59f89..66e3ecb7c10e4 100644 +--- a/kernel/sched/sched.h ++++ b/kernel/sched/sched.h +@@ -2292,9 +2292,11 @@ extern void cfs_bandwidth_usage_dec(void); + #ifdef CONFIG_NO_HZ_COMMON + #define NOHZ_BALANCE_KICK_BIT 0 + #define NOHZ_STATS_KICK_BIT 1 ++#define NOHZ_NEWILB_KICK_BIT 2 + + #define NOHZ_BALANCE_KICK BIT(NOHZ_BALANCE_KICK_BIT) + #define NOHZ_STATS_KICK BIT(NOHZ_STATS_KICK_BIT) ++#define NOHZ_NEWILB_KICK BIT(NOHZ_NEWILB_KICK_BIT) + + #define NOHZ_KICK_MASK (NOHZ_BALANCE_KICK | NOHZ_STATS_KICK) + +@@ -2305,6 +2307,11 @@ extern void nohz_balance_exit_idle(struct rq *rq); + static inline void nohz_balance_exit_idle(struct rq *rq) { } + #endif + ++#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) ++extern void nohz_run_idle_balance(int cpu); ++#else ++static inline void nohz_run_idle_balance(int cpu) { } ++#endif + + #ifdef CONFIG_SMP + static inline +-- +2.43.0 + diff --git a/queue-5.10/series b/queue-5.10/series index c77d8457fdf..9c9335219e7 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -432,3 +432,12 @@ usb-chipidea-udc-handle-usb-error-interrupt-if-ioc-n.patch mips-loongson64-dts-really-fix-pcie-port-nodes-for-l.patch powerpc-prom_init-fixup-missing-powermac-size-cells.patch misc-eeprom-eeprom_93cx6-add-quirk-for-extra-read-cl.patch +sched-core-remove-the-unnecessary-need_resched-check.patch +sched-fair-remove-update-of-blocked-load-from-newidl.patch +sched-fair-remove-unused-parameter-of-update_nohz_st.patch +sched-fair-merge-for-each-idle-cpu-loop-of-ilb.patch +sched-fair-trigger-the-update-of-blocked-load-on-new.patch +sched-fair-add-nohz-balancer-flag-for-nohz.next_bala.patch +sched-fair-check-idle_cpu-before-need_resched-to-det.patch +sched-core-prevent-wakeup-of-ksoftirqd-during-idle-l.patch +btrfs-fix-missing-snapshot-drew-unlock-when-root-is-.patch