]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 5.10
authorSasha Levin <sashal@kernel.org>
Thu, 12 Dec 2024 01:10:55 +0000 (20:10 -0500)
committerSasha Levin <sashal@kernel.org>
Thu, 12 Dec 2024 01:10:55 +0000 (20:10 -0500)
Signed-off-by: Sasha Levin <sashal@kernel.org>
queue-5.10/btrfs-fix-missing-snapshot-drew-unlock-when-root-is-.patch [new file with mode: 0644]
queue-5.10/sched-core-prevent-wakeup-of-ksoftirqd-during-idle-l.patch [new file with mode: 0644]
queue-5.10/sched-core-remove-the-unnecessary-need_resched-check.patch [new file with mode: 0644]
queue-5.10/sched-fair-add-nohz-balancer-flag-for-nohz.next_bala.patch [new file with mode: 0644]
queue-5.10/sched-fair-check-idle_cpu-before-need_resched-to-det.patch [new file with mode: 0644]
queue-5.10/sched-fair-merge-for-each-idle-cpu-loop-of-ilb.patch [new file with mode: 0644]
queue-5.10/sched-fair-remove-unused-parameter-of-update_nohz_st.patch [new file with mode: 0644]
queue-5.10/sched-fair-remove-update-of-blocked-load-from-newidl.patch [new file with mode: 0644]
queue-5.10/sched-fair-trigger-the-update-of-blocked-load-on-new.patch [new file with mode: 0644]
queue-5.10/series

diff --git a/queue-5.10/btrfs-fix-missing-snapshot-drew-unlock-when-root-is-.patch b/queue-5.10/btrfs-fix-missing-snapshot-drew-unlock-when-root-is-.patch
new file mode 100644 (file)
index 0000000..fbd7a37
--- /dev/null
@@ -0,0 +1,41 @@
+From 9f81587b787fcc2527b251a16f67e9581b852263 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 29 Nov 2024 13:33:03 +0000
+Subject: btrfs: fix missing snapshot drew unlock when root is dead during swap
+ activation
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit 9c803c474c6c002d8ade68ebe99026cc39c37f85 ]
+
+When activating a swap file we acquire the root's snapshot drew lock and
+then check if the root is dead, failing and returning with -EPERM if it's
+dead but without unlocking the root's snapshot lock. Fix this by adding
+the missing unlock.
+
+Fixes: 60021bd754c6 ("btrfs: prevent subvol with swapfile from being deleted")
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/inode.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
+index 24598acb9a314..eba87f2936d2c 100644
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -10283,6 +10283,7 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
+       if (btrfs_root_dead(root)) {
+               spin_unlock(&root->root_item_lock);
++              btrfs_drew_write_unlock(&root->snapshot_lock);
+               btrfs_exclop_finish(fs_info);
+               btrfs_warn(fs_info,
+               "cannot activate swapfile because subvolume %llu is being deleted",
+-- 
+2.43.0
+
diff --git a/queue-5.10/sched-core-prevent-wakeup-of-ksoftirqd-during-idle-l.patch b/queue-5.10/sched-core-prevent-wakeup-of-ksoftirqd-during-idle-l.patch
new file mode 100644 (file)
index 0000000..0b4c2b4
--- /dev/null
@@ -0,0 +1,71 @@
+From 96b450db25e41072ecaa1219b75be13cf1c7ace9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 19 Nov 2024 05:44:32 +0000
+Subject: sched/core: Prevent wakeup of ksoftirqd during idle load balance
+
+From: K Prateek Nayak <kprateek.nayak@amd.com>
+
+[ Upstream commit e932c4ab38f072ce5894b2851fea8bc5754bb8e5 ]
+
+Scheduler raises a SCHED_SOFTIRQ to trigger a load balancing event on
+from the IPI handler on the idle CPU. If the SMP function is invoked
+from an idle CPU via flush_smp_call_function_queue() then the HARD-IRQ
+flag is not set and raise_softirq_irqoff() needlessly wakes ksoftirqd
+because soft interrupts are handled before ksoftirqd get on the CPU.
+
+Adding a trace_printk() in nohz_csd_func() at the spot of raising
+SCHED_SOFTIRQ and enabling trace events for sched_switch, sched_wakeup,
+and softirq_entry (for SCHED_SOFTIRQ vector alone) helps observing the
+current behavior:
+
+       <idle>-0   [000] dN.1.:  nohz_csd_func: Raising SCHED_SOFTIRQ from nohz_csd_func
+       <idle>-0   [000] dN.4.:  sched_wakeup: comm=ksoftirqd/0 pid=16 prio=120 target_cpu=000
+       <idle>-0   [000] .Ns1.:  softirq_entry: vec=7 [action=SCHED]
+       <idle>-0   [000] .Ns1.:  softirq_exit: vec=7  [action=SCHED]
+       <idle>-0   [000] d..2.:  sched_switch: prev_comm=swapper/0 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=ksoftirqd/0 next_pid=16 next_prio=120
+  ksoftirqd/0-16  [000] d..2.:  sched_switch: prev_comm=ksoftirqd/0 prev_pid=16 prev_prio=120 prev_state=S ==> next_comm=swapper/0 next_pid=0 next_prio=120
+       ...
+
+Use __raise_softirq_irqoff() to raise the softirq. The SMP function call
+is always invoked on the requested CPU in an interrupt handler. It is
+guaranteed that soft interrupts are handled at the end.
+
+Following are the observations with the changes when enabling the same
+set of events:
+
+       <idle>-0       [000] dN.1.: nohz_csd_func: Raising SCHED_SOFTIRQ for nohz_idle_balance
+       <idle>-0       [000] dN.1.: softirq_raise: vec=7 [action=SCHED]
+       <idle>-0       [000] .Ns1.: softirq_entry: vec=7 [action=SCHED]
+
+No unnecessary ksoftirqd wakeups are seen from idle task's context to
+service the softirq.
+
+Fixes: b2a02fc43a1f ("smp: Optimize send_call_function_single_ipi()")
+Closes: https://lore.kernel.org/lkml/fcf823f-195e-6c9a-eac3-25f870cb35ac@inria.fr/ [1]
+Reported-by: Julia Lawall <julia.lawall@inria.fr>
+Suggested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Link: https://lore.kernel.org/r/20241119054432.6405-5-kprateek.nayak@amd.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/core.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 1f4bf91c27d22..7cf45d506688c 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -752,7 +752,7 @@ static void nohz_csd_func(void *info)
+       rq->idle_balance = idle_cpu(cpu);
+       if (rq->idle_balance) {
+               rq->nohz_idle_balance = flags;
+-              raise_softirq_irqoff(SCHED_SOFTIRQ);
++              __raise_softirq_irqoff(SCHED_SOFTIRQ);
+       }
+ }
+-- 
+2.43.0
+
diff --git a/queue-5.10/sched-core-remove-the-unnecessary-need_resched-check.patch b/queue-5.10/sched-core-remove-the-unnecessary-need_resched-check.patch
new file mode 100644 (file)
index 0000000..9c91214
--- /dev/null
@@ -0,0 +1,122 @@
+From c6729d6eac04e2570b1043dd8322be448456c93a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 19 Nov 2024 05:44:30 +0000
+Subject: sched/core: Remove the unnecessary need_resched() check in
+ nohz_csd_func()
+
+From: K Prateek Nayak <kprateek.nayak@amd.com>
+
+[ Upstream commit ea9cffc0a154124821531991d5afdd7e8b20d7aa ]
+
+The need_resched() check currently in nohz_csd_func() can be tracked
+to have been added in scheduler_ipi() back in 2011 via commit
+ca38062e57e9 ("sched: Use resched IPI to kick off the nohz idle balance")
+
+Since then, it has travelled quite a bit but it seems like an idle_cpu()
+check currently is sufficient to detect the need to bail out from an
+idle load balancing. To justify this removal, consider all the following
+case where an idle load balancing could race with a task wakeup:
+
+o Since commit f3dd3f674555b ("sched: Remove the limitation of WF_ON_CPU
+  on wakelist if wakee cpu is idle") a target perceived to be idle
+  (target_rq->nr_running == 0) will return true for
+  ttwu_queue_cond(target) which will offload the task wakeup to the idle
+  target via an IPI.
+
+  In all such cases target_rq->ttwu_pending will be set to 1 before
+  queuing the wake function.
+
+  If an idle load balance races here, following scenarios are possible:
+
+  - The CPU is not in TIF_POLLING_NRFLAG mode in which case an actual
+    IPI is sent to the CPU to wake it out of idle. If the
+    nohz_csd_func() queues before sched_ttwu_pending(), the idle load
+    balance will bail out since idle_cpu(target) returns 0 since
+    target_rq->ttwu_pending is 1. If the nohz_csd_func() is queued after
+    sched_ttwu_pending() it should see rq->nr_running to be non-zero and
+    bail out of idle load balancing.
+
+  - The CPU is in TIF_POLLING_NRFLAG mode and instead of an actual IPI,
+    the sender will simply set TIF_NEED_RESCHED for the target to put it
+    out of idle and flush_smp_call_function_queue() in do_idle() will
+    execute the call function. Depending on the ordering of the queuing
+    of nohz_csd_func() and sched_ttwu_pending(), the idle_cpu() check in
+    nohz_csd_func() should either see target_rq->ttwu_pending = 1 or
+    target_rq->nr_running to be non-zero if there is a genuine task
+    wakeup racing with the idle load balance kick.
+
+o The waker CPU perceives the target CPU to be busy
+  (targer_rq->nr_running != 0) but the CPU is in fact going idle and due
+  to a series of unfortunate events, the system reaches a case where the
+  waker CPU decides to perform the wakeup by itself in ttwu_queue() on
+  the target CPU but target is concurrently selected for idle load
+  balance (XXX: Can this happen? I'm not sure, but we'll consider the
+  mother of all coincidences to estimate the worst case scenario).
+
+  ttwu_do_activate() calls enqueue_task() which would increment
+  "rq->nr_running" post which it calls wakeup_preempt() which is
+  responsible for setting TIF_NEED_RESCHED (via a resched IPI or by
+  setting TIF_NEED_RESCHED on a TIF_POLLING_NRFLAG idle CPU) The key
+  thing to note in this case is that rq->nr_running is already non-zero
+  in case of a wakeup before TIF_NEED_RESCHED is set which would
+  lead to idle_cpu() check returning false.
+
+In all cases, it seems that need_resched() check is unnecessary when
+checking for idle_cpu() first since an impending wakeup racing with idle
+load balancer will either set the "rq->ttwu_pending" or indicate a newly
+woken task via "rq->nr_running".
+
+Chasing the reason why this check might have existed in the first place,
+I came across  Peter's suggestion on the fist iteration of Suresh's
+patch from 2011 [1] where the condition to raise the SCHED_SOFTIRQ was:
+
+       sched_ttwu_do_pending(list);
+
+       if (unlikely((rq->idle == current) &&
+           rq->nohz_balance_kick &&
+           !need_resched()))
+               raise_softirq_irqoff(SCHED_SOFTIRQ);
+
+Since the condition to raise the SCHED_SOFIRQ was preceded by
+sched_ttwu_do_pending() (which is equivalent of sched_ttwu_pending()) in
+the current upstream kernel, the need_resched() check was necessary to
+catch a newly queued task. Peter suggested modifying it to:
+
+       if (idle_cpu() && rq->nohz_balance_kick && !need_resched())
+               raise_softirq_irqoff(SCHED_SOFTIRQ);
+
+where idle_cpu() seems to have replaced "rq->idle == current" check.
+
+Even back then, the idle_cpu() check would have been sufficient to catch
+a new task being enqueued. Since commit b2a02fc43a1f ("smp: Optimize
+send_call_function_single_ipi()") overloads the interpretation of
+TIF_NEED_RESCHED for TIF_POLLING_NRFLAG idling, remove the
+need_resched() check in nohz_csd_func() to raise SCHED_SOFTIRQ based
+on Peter's suggestion.
+
+Fixes: b2a02fc43a1f ("smp: Optimize send_call_function_single_ipi()")
+Suggested-by: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20241119054432.6405-3-kprateek.nayak@amd.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/core.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 29d8fc3a7bbd2..8e30041cecf94 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -750,7 +750,7 @@ static void nohz_csd_func(void *info)
+       WARN_ON(!(flags & NOHZ_KICK_MASK));
+       rq->idle_balance = idle_cpu(cpu);
+-      if (rq->idle_balance && !need_resched()) {
++      if (rq->idle_balance) {
+               rq->nohz_idle_balance = flags;
+               raise_softirq_irqoff(SCHED_SOFTIRQ);
+       }
+-- 
+2.43.0
+
diff --git a/queue-5.10/sched-fair-add-nohz-balancer-flag-for-nohz.next_bala.patch b/queue-5.10/sched-fair-add-nohz-balancer-flag-for-nohz.next_bala.patch
new file mode 100644 (file)
index 0000000..fc30ad8
--- /dev/null
@@ -0,0 +1,147 @@
+From 5cd103305979a38803302ff2901d488f34ad2eb1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 23 Aug 2021 12:16:59 +0100
+Subject: sched/fair: Add NOHZ balancer flag for nohz.next_balance updates
+
+From: Valentin Schneider <valentin.schneider@arm.com>
+
+[ Upstream commit efd984c481abb516fab8bafb25bf41fd9397a43c ]
+
+A following patch will trigger NOHZ idle balances as a means to update
+nohz.next_balance. Vincent noted that blocked load updates can have
+non-negligible overhead, which should be avoided if the intent is to only
+update nohz.next_balance.
+
+Add a new NOHZ balance kick flag, NOHZ_NEXT_KICK. Gate NOHZ blocked load
+update by the presence of NOHZ_STATS_KICK - currently all NOHZ balance
+kicks will have the NOHZ_STATS_KICK flag set, so no change in behaviour is
+expected.
+
+Suggested-by: Vincent Guittot <vincent.guittot@linaro.org>
+Signed-off-by: Valentin Schneider <valentin.schneider@arm.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
+Link: https://lkml.kernel.org/r/20210823111700.2842997-2-valentin.schneider@arm.com
+Stable-dep-of: ff47a0acfcce ("sched/fair: Check idle_cpu() before need_resched() to detect ilb CPU turning busy")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/fair.c  | 24 ++++++++++++++----------
+ kernel/sched/sched.h |  8 +++++++-
+ 2 files changed, 21 insertions(+), 11 deletions(-)
+
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index 8121cfd60b8fb..e2116e3d593ec 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -10438,7 +10438,7 @@ static void nohz_balancer_kick(struct rq *rq)
+               goto out;
+       if (rq->nr_running >= 2) {
+-              flags = NOHZ_KICK_MASK;
++              flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
+               goto out;
+       }
+@@ -10452,7 +10452,7 @@ static void nohz_balancer_kick(struct rq *rq)
+                * on.
+                */
+               if (rq->cfs.h_nr_running >= 1 && check_cpu_capacity(rq, sd)) {
+-                      flags = NOHZ_KICK_MASK;
++                      flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
+                       goto unlock;
+               }
+       }
+@@ -10466,7 +10466,7 @@ static void nohz_balancer_kick(struct rq *rq)
+                */
+               for_each_cpu_and(i, sched_domain_span(sd), nohz.idle_cpus_mask) {
+                       if (sched_asym_prefer(i, cpu)) {
+-                              flags = NOHZ_KICK_MASK;
++                              flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
+                               goto unlock;
+                       }
+               }
+@@ -10479,7 +10479,7 @@ static void nohz_balancer_kick(struct rq *rq)
+                * to run the misfit task on.
+                */
+               if (check_misfit_status(rq, sd)) {
+-                      flags = NOHZ_KICK_MASK;
++                      flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
+                       goto unlock;
+               }
+@@ -10506,7 +10506,7 @@ static void nohz_balancer_kick(struct rq *rq)
+                */
+               nr_busy = atomic_read(&sds->nr_busy_cpus);
+               if (nr_busy > 1) {
+-                      flags = NOHZ_KICK_MASK;
++                      flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
+                       goto unlock;
+               }
+       }
+@@ -10653,7 +10653,8 @@ static bool _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
+        * setting the flag, we are sure to not clear the state and not
+        * check the load of an idle cpu.
+        */
+-      WRITE_ONCE(nohz.has_blocked, 0);
++      if (flags & NOHZ_STATS_KICK)
++              WRITE_ONCE(nohz.has_blocked, 0);
+       /*
+        * Ensures that if we miss the CPU, we must see the has_blocked
+@@ -10675,13 +10676,15 @@ static bool _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
+                * balancing owner will pick it up.
+                */
+               if (need_resched()) {
+-                      has_blocked_load = true;
++                      if (flags & NOHZ_STATS_KICK)
++                              has_blocked_load = true;
+                       goto abort;
+               }
+               rq = cpu_rq(balance_cpu);
+-              has_blocked_load |= update_nohz_stats(rq);
++              if (flags & NOHZ_STATS_KICK)
++                      has_blocked_load |= update_nohz_stats(rq);
+               /*
+                * If time for next balance is due,
+@@ -10712,8 +10715,9 @@ static bool _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
+       if (likely(update_next_balance))
+               nohz.next_balance = next_balance;
+-      WRITE_ONCE(nohz.next_blocked,
+-              now + msecs_to_jiffies(LOAD_AVG_PERIOD));
++      if (flags & NOHZ_STATS_KICK)
++              WRITE_ONCE(nohz.next_blocked,
++                         now + msecs_to_jiffies(LOAD_AVG_PERIOD));
+       /* The full idle balance loop has been done */
+       ret = true;
+diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
+index 66e3ecb7c10e4..5f17507bd66b8 100644
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -2293,12 +2293,18 @@ extern void cfs_bandwidth_usage_dec(void);
+ #define NOHZ_BALANCE_KICK_BIT 0
+ #define NOHZ_STATS_KICK_BIT   1
+ #define NOHZ_NEWILB_KICK_BIT  2
++#define NOHZ_NEXT_KICK_BIT    3
++/* Run rebalance_domains() */
+ #define NOHZ_BALANCE_KICK     BIT(NOHZ_BALANCE_KICK_BIT)
++/* Update blocked load */
+ #define NOHZ_STATS_KICK               BIT(NOHZ_STATS_KICK_BIT)
++/* Update blocked load when entering idle */
+ #define NOHZ_NEWILB_KICK      BIT(NOHZ_NEWILB_KICK_BIT)
++/* Update nohz.next_balance */
++#define NOHZ_NEXT_KICK                BIT(NOHZ_NEXT_KICK_BIT)
+-#define NOHZ_KICK_MASK        (NOHZ_BALANCE_KICK | NOHZ_STATS_KICK)
++#define NOHZ_KICK_MASK        (NOHZ_BALANCE_KICK | NOHZ_STATS_KICK | NOHZ_NEXT_KICK)
+ #define nohz_flags(cpu)       (&cpu_rq(cpu)->nohz_flags)
+-- 
+2.43.0
+
diff --git a/queue-5.10/sched-fair-check-idle_cpu-before-need_resched-to-det.patch b/queue-5.10/sched-fair-check-idle_cpu-before-need_resched-to-det.patch
new file mode 100644 (file)
index 0000000..f9bad56
--- /dev/null
@@ -0,0 +1,60 @@
+From 5f41ec38490ab87b8f4281161d5ab93e32a3baeb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 19 Nov 2024 05:44:31 +0000
+Subject: sched/fair: Check idle_cpu() before need_resched() to detect ilb CPU
+ turning busy
+
+From: K Prateek Nayak <kprateek.nayak@amd.com>
+
+[ Upstream commit ff47a0acfcce309cf9e175149c75614491953c8f ]
+
+Commit b2a02fc43a1f ("smp: Optimize send_call_function_single_ipi()")
+optimizes IPIs to idle CPUs in TIF_POLLING_NRFLAG mode by setting the
+TIF_NEED_RESCHED flag in idle task's thread info and relying on
+flush_smp_call_function_queue() in idle exit path to run the
+call-function. A softirq raised by the call-function is handled shortly
+after in do_softirq_post_smp_call_flush() but the TIF_NEED_RESCHED flag
+remains set and is only cleared later when schedule_idle() calls
+__schedule().
+
+need_resched() check in _nohz_idle_balance() exists to bail out of load
+balancing if another task has woken up on the CPU currently in-charge of
+idle load balancing which is being processed in SCHED_SOFTIRQ context.
+Since the optimization mentioned above overloads the interpretation of
+TIF_NEED_RESCHED, check for idle_cpu() before going with the existing
+need_resched() check which can catch a genuine task wakeup on an idle
+CPU processing SCHED_SOFTIRQ from do_softirq_post_smp_call_flush(), as
+well as the case where ksoftirqd needs to be preempted as a result of
+new task wakeup or slice expiry.
+
+In case of PREEMPT_RT or threadirqs, although the idle load balancing
+may be inhibited in some cases on the ilb CPU, the fact that ksoftirqd
+is the only fair task going back to sleep will trigger a newidle balance
+on the CPU which will alleviate some imbalance if it exists if idle
+balance fails to do so.
+
+Fixes: b2a02fc43a1f ("smp: Optimize send_call_function_single_ipi()")
+Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20241119054432.6405-4-kprateek.nayak@amd.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/fair.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index e2116e3d593ec..9f8cb265589b3 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -10675,7 +10675,7 @@ static bool _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
+                * work being done for other CPUs. Next load
+                * balancing owner will pick it up.
+                */
+-              if (need_resched()) {
++              if (!idle_cpu(this_cpu) && need_resched()) {
+                       if (flags & NOHZ_STATS_KICK)
+                               has_blocked_load = true;
+                       goto abort;
+-- 
+2.43.0
+
diff --git a/queue-5.10/sched-fair-merge-for-each-idle-cpu-loop-of-ilb.patch b/queue-5.10/sched-fair-merge-for-each-idle-cpu-loop-of-ilb.patch
new file mode 100644 (file)
index 0000000..9a15e4f
--- /dev/null
@@ -0,0 +1,91 @@
+From 2c353fb77d9fe2fbdb486d7ae22236f70374e129 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 24 Feb 2021 14:30:04 +0100
+Subject: sched/fair: Merge for each idle cpu loop of ILB
+
+From: Vincent Guittot <vincent.guittot@linaro.org>
+
+[ Upstream commit 7a82e5f52a3506bc35a4dc04d53ad2c9daf82e7f ]
+
+Remove the specific case for handling this_cpu outside for_each_cpu() loop
+when running ILB. Instead we use for_each_cpu_wrap() and start with the
+next cpu after this_cpu so we will continue to finish with this_cpu.
+
+update_nohz_stats() is now used for this_cpu too and will prevents
+unnecessary update. We don't need a special case for handling the update of
+nohz.next_balance for this_cpu anymore because it is now handled by the
+loop like others.
+
+Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
+Link: https://lkml.kernel.org/r/20210224133007.28644-5-vincent.guittot@linaro.org
+Stable-dep-of: ff47a0acfcce ("sched/fair: Check idle_cpu() before need_resched() to detect ilb CPU turning busy")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/fair.c | 32 +++++++-------------------------
+ 1 file changed, 7 insertions(+), 25 deletions(-)
+
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index 0af373c4d7450..ab29666eb50ed 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -10333,22 +10333,9 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle)
+        * When the cpu is attached to null domain for ex, it will not be
+        * updated.
+        */
+-      if (likely(update_next_balance)) {
++      if (likely(update_next_balance))
+               rq->next_balance = next_balance;
+-#ifdef CONFIG_NO_HZ_COMMON
+-              /*
+-               * If this CPU has been elected to perform the nohz idle
+-               * balance. Other idle CPUs have already rebalanced with
+-               * nohz_idle_balance() and nohz.next_balance has been
+-               * updated accordingly. This CPU is now running the idle load
+-               * balance for itself and we need to update the
+-               * nohz.next_balance accordingly.
+-               */
+-              if ((idle == CPU_IDLE) && time_after(nohz.next_balance, rq->next_balance))
+-                      nohz.next_balance = rq->next_balance;
+-#endif
+-      }
+ }
+ static inline int on_null_domain(struct rq *rq)
+@@ -10674,8 +10661,12 @@ static bool _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
+        */
+       smp_mb();
+-      for_each_cpu(balance_cpu, nohz.idle_cpus_mask) {
+-              if (balance_cpu == this_cpu || !idle_cpu(balance_cpu))
++      /*
++       * Start with the next CPU after this_cpu so we will end with this_cpu and let a
++       * chance for other idle cpu to pull load.
++       */
++      for_each_cpu_wrap(balance_cpu,  nohz.idle_cpus_mask, this_cpu+1) {
++              if (!idle_cpu(balance_cpu))
+                       continue;
+               /*
+@@ -10721,15 +10712,6 @@ static bool _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
+       if (likely(update_next_balance))
+               nohz.next_balance = next_balance;
+-      /* Newly idle CPU doesn't need an update */
+-      if (idle != CPU_NEWLY_IDLE) {
+-              update_blocked_averages(this_cpu);
+-              has_blocked_load |= this_rq->has_blocked_load;
+-      }
+-
+-      if (flags & NOHZ_BALANCE_KICK)
+-              rebalance_domains(this_rq, CPU_IDLE);
+-
+       WRITE_ONCE(nohz.next_blocked,
+               now + msecs_to_jiffies(LOAD_AVG_PERIOD));
+-- 
+2.43.0
+
diff --git a/queue-5.10/sched-fair-remove-unused-parameter-of-update_nohz_st.patch b/queue-5.10/sched-fair-remove-unused-parameter-of-update_nohz_st.patch
new file mode 100644 (file)
index 0000000..2376105
--- /dev/null
@@ -0,0 +1,57 @@
+From 326ba88b7561f78c7ba2444e50db1bf108323cef Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 24 Feb 2021 14:30:03 +0100
+Subject: sched/fair: Remove unused parameter of update_nohz_stats
+
+From: Vincent Guittot <vincent.guittot@linaro.org>
+
+[ Upstream commit 64f84f273592d17dcdca20244168ad9f525a39c3 ]
+
+idle load balance is the only user of update_nohz_stats and doesn't use
+force parameter. Remove it
+
+Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
+Link: https://lkml.kernel.org/r/20210224133007.28644-4-vincent.guittot@linaro.org
+Stable-dep-of: ff47a0acfcce ("sched/fair: Check idle_cpu() before need_resched() to detect ilb CPU turning busy")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/fair.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index a6e34c58cee92..0af373c4d7450 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -8650,7 +8650,7 @@ group_type group_classify(unsigned int imbalance_pct,
+       return group_has_spare;
+ }
+-static bool update_nohz_stats(struct rq *rq, bool force)
++static bool update_nohz_stats(struct rq *rq)
+ {
+ #ifdef CONFIG_NO_HZ_COMMON
+       unsigned int cpu = rq->cpu;
+@@ -8661,7 +8661,7 @@ static bool update_nohz_stats(struct rq *rq, bool force)
+       if (!cpumask_test_cpu(cpu, nohz.idle_cpus_mask))
+               return false;
+-      if (!force && !time_after(jiffies, rq->last_blocked_load_update_tick))
++      if (!time_after(jiffies, rq->last_blocked_load_update_tick))
+               return true;
+       update_blocked_averages(cpu);
+@@ -10690,7 +10690,7 @@ static bool _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
+               rq = cpu_rq(balance_cpu);
+-              has_blocked_load |= update_nohz_stats(rq, true);
++              has_blocked_load |= update_nohz_stats(rq);
+               /*
+                * If time for next balance is due,
+-- 
+2.43.0
+
diff --git a/queue-5.10/sched-fair-remove-update-of-blocked-load-from-newidl.patch b/queue-5.10/sched-fair-remove-update-of-blocked-load-from-newidl.patch
new file mode 100644 (file)
index 0000000..6737615
--- /dev/null
@@ -0,0 +1,123 @@
+From 50a650029b8d6b6f26bdffa091b8c3935faeb9c2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 24 Feb 2021 14:30:01 +0100
+Subject: sched/fair: Remove update of blocked load from newidle_balance
+
+From: Vincent Guittot <vincent.guittot@linaro.org>
+
+[ Upstream commit 0826530de3cbdc89e60a89e86def94a5f0fc81ca ]
+
+newidle_balance runs with both preempt and irq disabled which prevent
+local irq to run during this period. The duration for updating the
+blocked load of CPUs varies according to the number of CPU cgroups
+with non-decayed load and extends this critical period to an uncontrolled
+level.
+
+Remove the update from newidle_balance and trigger a normal ILB that
+will take care of the update instead.
+
+This reduces the IRQ latency from O(nr_cgroups * nr_nohz_cpus) to
+O(nr_cgroups).
+
+Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
+Link: https://lkml.kernel.org/r/20210224133007.28644-2-vincent.guittot@linaro.org
+Stable-dep-of: ff47a0acfcce ("sched/fair: Check idle_cpu() before need_resched() to detect ilb CPU turning busy")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/fair.c | 33 +++++----------------------------
+ 1 file changed, 5 insertions(+), 28 deletions(-)
+
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index a6a755aec32b5..a6e34c58cee92 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -7621,8 +7621,6 @@ enum migration_type {
+ #define LBF_NEED_BREAK        0x02
+ #define LBF_DST_PINNED  0x04
+ #define LBF_SOME_PINNED       0x08
+-#define LBF_NOHZ_STATS        0x10
+-#define LBF_NOHZ_AGAIN        0x20
+ struct lb_env {
+       struct sched_domain     *sd;
+@@ -8695,9 +8693,6 @@ static inline void update_sg_lb_stats(struct lb_env *env,
+       for_each_cpu_and(i, sched_group_span(group), env->cpus) {
+               struct rq *rq = cpu_rq(i);
+-              if ((env->flags & LBF_NOHZ_STATS) && update_nohz_stats(rq, false))
+-                      env->flags |= LBF_NOHZ_AGAIN;
+-
+               sgs->group_load += cpu_load(rq);
+               sgs->group_util += cpu_util(i);
+               sgs->group_runnable += cpu_runnable(rq);
+@@ -9230,11 +9225,6 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
+       struct sg_lb_stats tmp_sgs;
+       int sg_status = 0;
+-#ifdef CONFIG_NO_HZ_COMMON
+-      if (env->idle == CPU_NEWLY_IDLE && READ_ONCE(nohz.has_blocked))
+-              env->flags |= LBF_NOHZ_STATS;
+-#endif
+-
+       do {
+               struct sg_lb_stats *sgs = &tmp_sgs;
+               int local_group;
+@@ -9271,14 +9261,6 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
+       /* Tag domain that child domain prefers tasks go to siblings first */
+       sds->prefer_sibling = child && child->flags & SD_PREFER_SIBLING;
+-#ifdef CONFIG_NO_HZ_COMMON
+-      if ((env->flags & LBF_NOHZ_AGAIN) &&
+-          cpumask_subset(nohz.idle_cpus_mask, sched_domain_span(env->sd))) {
+-
+-              WRITE_ONCE(nohz.next_blocked,
+-                         jiffies + msecs_to_jiffies(LOAD_AVG_PERIOD));
+-      }
+-#endif
+       if (env->sd->flags & SD_NUMA)
+               env->fbq_type = fbq_classify_group(&sds->busiest_stat);
+@@ -10803,16 +10785,11 @@ static void nohz_newidle_balance(struct rq *this_rq)
+           time_before(jiffies, READ_ONCE(nohz.next_blocked)))
+               return;
+-      raw_spin_unlock(&this_rq->lock);
+       /*
+-       * This CPU is going to be idle and blocked load of idle CPUs
+-       * need to be updated. Run the ilb locally as it is a good
+-       * candidate for ilb instead of waking up another idle CPU.
+-       * Kick an normal ilb if we failed to do the update.
++       * Blocked load of idle CPUs need to be updated.
++       * Kick an ILB to update statistics.
+        */
+-      if (!_nohz_idle_balance(this_rq, NOHZ_STATS_KICK, CPU_NEWLY_IDLE))
+-              kick_ilb(NOHZ_STATS_KICK);
+-      raw_spin_lock(&this_rq->lock);
++      kick_ilb(NOHZ_STATS_KICK);
+ }
+ #else /* !CONFIG_NO_HZ_COMMON */
+@@ -10873,8 +10850,6 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
+                       update_next_balance(sd, &next_balance);
+               rcu_read_unlock();
+-              nohz_newidle_balance(this_rq);
+-
+               goto out;
+       }
+@@ -10940,6 +10915,8 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
+       if (pulled_task)
+               this_rq->idle_stamp = 0;
++      else
++              nohz_newidle_balance(this_rq);
+       rq_repin_lock(this_rq, rf);
+-- 
+2.43.0
+
diff --git a/queue-5.10/sched-fair-trigger-the-update-of-blocked-load-on-new.patch b/queue-5.10/sched-fair-trigger-the-update-of-blocked-load-on-new.patch
new file mode 100644 (file)
index 0000000..373675a
--- /dev/null
@@ -0,0 +1,131 @@
+From 0f5cd7f09da695d3110904e6c147e94988d78848 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 24 Feb 2021 14:30:06 +0100
+Subject: sched/fair: Trigger the update of blocked load on newly idle cpu
+
+From: Vincent Guittot <vincent.guittot@linaro.org>
+
+[ Upstream commit c6f886546cb8a38617cdbe755fe50d3acd2463e4 ]
+
+Instead of waking up a random and already idle CPU, we can take advantage
+of this_cpu being about to enter idle to run the ILB and update the
+blocked load.
+
+Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
+Link: https://lkml.kernel.org/r/20210224133007.28644-7-vincent.guittot@linaro.org
+Stable-dep-of: ff47a0acfcce ("sched/fair: Check idle_cpu() before need_resched() to detect ilb CPU turning busy")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/core.c  |  2 +-
+ kernel/sched/fair.c  | 24 +++++++++++++++++++++---
+ kernel/sched/idle.c  |  6 ++++++
+ kernel/sched/sched.h |  7 +++++++
+ 4 files changed, 35 insertions(+), 4 deletions(-)
+
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 8e30041cecf94..1f4bf91c27d22 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -746,7 +746,7 @@ static void nohz_csd_func(void *info)
+       /*
+        * Release the rq::nohz_csd.
+        */
+-      flags = atomic_fetch_andnot(NOHZ_KICK_MASK, nohz_flags(cpu));
++      flags = atomic_fetch_andnot(NOHZ_KICK_MASK | NOHZ_NEWILB_KICK, nohz_flags(cpu));
+       WARN_ON(!(flags & NOHZ_KICK_MASK));
+       rq->idle_balance = idle_cpu(cpu);
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index ab29666eb50ed..8121cfd60b8fb 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -10747,6 +10747,24 @@ static bool nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
+       return true;
+ }
++/*
++ * Check if we need to run the ILB for updating blocked load before entering
++ * idle state.
++ */
++void nohz_run_idle_balance(int cpu)
++{
++      unsigned int flags;
++
++      flags = atomic_fetch_andnot(NOHZ_NEWILB_KICK, nohz_flags(cpu));
++
++      /*
++       * Update the blocked load only if no SCHED_SOFTIRQ is about to happen
++       * (ie NOHZ_STATS_KICK set) and will do the same.
++       */
++      if ((flags == NOHZ_NEWILB_KICK) && !need_resched())
++              _nohz_idle_balance(cpu_rq(cpu), NOHZ_STATS_KICK, CPU_IDLE);
++}
++
+ static void nohz_newidle_balance(struct rq *this_rq)
+ {
+       int this_cpu = this_rq->cpu;
+@@ -10768,10 +10786,10 @@ static void nohz_newidle_balance(struct rq *this_rq)
+               return;
+       /*
+-       * Blocked load of idle CPUs need to be updated.
+-       * Kick an ILB to update statistics.
++       * Set the need to trigger ILB in order to update blocked load
++       * before entering idle state.
+        */
+-      kick_ilb(NOHZ_STATS_KICK);
++      atomic_or(NOHZ_NEWILB_KICK, nohz_flags(this_cpu));
+ }
+ #else /* !CONFIG_NO_HZ_COMMON */
+diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
+index 2593a733c0849..cdc3e690de714 100644
+--- a/kernel/sched/idle.c
++++ b/kernel/sched/idle.c
+@@ -261,6 +261,12 @@ static void cpuidle_idle_call(void)
+ static void do_idle(void)
+ {
+       int cpu = smp_processor_id();
++
++      /*
++       * Check if we need to update blocked load
++       */
++      nohz_run_idle_balance(cpu);
++
+       /*
+        * If the arch has a polling bit, we maintain an invariant:
+        *
+diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
+index df6cf8aa59f89..66e3ecb7c10e4 100644
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -2292,9 +2292,11 @@ extern void cfs_bandwidth_usage_dec(void);
+ #ifdef CONFIG_NO_HZ_COMMON
+ #define NOHZ_BALANCE_KICK_BIT 0
+ #define NOHZ_STATS_KICK_BIT   1
++#define NOHZ_NEWILB_KICK_BIT  2
+ #define NOHZ_BALANCE_KICK     BIT(NOHZ_BALANCE_KICK_BIT)
+ #define NOHZ_STATS_KICK               BIT(NOHZ_STATS_KICK_BIT)
++#define NOHZ_NEWILB_KICK      BIT(NOHZ_NEWILB_KICK_BIT)
+ #define NOHZ_KICK_MASK        (NOHZ_BALANCE_KICK | NOHZ_STATS_KICK)
+@@ -2305,6 +2307,11 @@ extern void nohz_balance_exit_idle(struct rq *rq);
+ static inline void nohz_balance_exit_idle(struct rq *rq) { }
+ #endif
++#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
++extern void nohz_run_idle_balance(int cpu);
++#else
++static inline void nohz_run_idle_balance(int cpu) { }
++#endif
+ #ifdef CONFIG_SMP
+ static inline
+-- 
+2.43.0
+
index c77d8457fdfe9c0f446068c891355374809cff9d..9c9335219e78e10b087811e8dd10dc6ff16640e0 100644 (file)
@@ -432,3 +432,12 @@ usb-chipidea-udc-handle-usb-error-interrupt-if-ioc-n.patch
 mips-loongson64-dts-really-fix-pcie-port-nodes-for-l.patch
 powerpc-prom_init-fixup-missing-powermac-size-cells.patch
 misc-eeprom-eeprom_93cx6-add-quirk-for-extra-read-cl.patch
+sched-core-remove-the-unnecessary-need_resched-check.patch
+sched-fair-remove-update-of-blocked-load-from-newidl.patch
+sched-fair-remove-unused-parameter-of-update_nohz_st.patch
+sched-fair-merge-for-each-idle-cpu-loop-of-ilb.patch
+sched-fair-trigger-the-update-of-blocked-load-on-new.patch
+sched-fair-add-nohz-balancer-flag-for-nohz.next_bala.patch
+sched-fair-check-idle_cpu-before-need_resched-to-det.patch
+sched-core-prevent-wakeup-of-ksoftirqd-during-idle-l.patch
+btrfs-fix-missing-snapshot-drew-unlock-when-root-is-.patch