+++ /dev/null
-From 5b6547ed97f4f5dfc23f8e3970af6d11d7b7ed7e Mon Sep 17 00:00:00 2001
-From: Peter Zijlstra <peterz@infradead.org>
-Date: Wed, 16 Mar 2022 22:03:41 +0100
-Subject: sched/core: Fix forceidle balancing
-
-From: Peter Zijlstra <peterz@infradead.org>
-
-commit 5b6547ed97f4f5dfc23f8e3970af6d11d7b7ed7e upstream.
-
-Steve reported that ChromeOS encounters the forceidle balancer being
-ran from rt_mutex_setprio()'s balance_callback() invocation and
-explodes.
-
-Now, the forceidle balancer gets queued every time the idle task gets
-selected, set_next_task(), which is strictly too often.
-rt_mutex_setprio() also uses set_next_task() in the 'change' pattern:
-
- queued = task_on_rq_queued(p); /* p->on_rq == TASK_ON_RQ_QUEUED */
- running = task_current(rq, p); /* rq->curr == p */
-
- if (queued)
- dequeue_task(...);
- if (running)
- put_prev_task(...);
-
- /* change task properties */
-
- if (queued)
- enqueue_task(...);
- if (running)
- set_next_task(...);
-
-However, rt_mutex_setprio() will explicitly not run this pattern on
-the idle task (since priority boosting the idle task is quite insane).
-Most other 'change' pattern users are pidhash based and would also not
-apply to idle.
-
-Also, the change pattern doesn't contain a __balance_callback()
-invocation and hence we could have an out-of-band balance-callback,
-which *should* trigger the WARN in rq_pin_lock() (which guards against
-this exact anti-pattern).
-
-So while none of that explains how this happens, it does indicate that
-having it in set_next_task() might not be the most robust option.
-
-Instead, explicitly queue the forceidle balancer from pick_next_task()
-when it does indeed result in forceidle selection. Having it here,
-ensures it can only be triggered under the __schedule() rq->lock
-instance, and hence must be ran from that context.
-
-This also happens to clean up the code a little, so win-win.
-
-Fixes: d2dfa17bc7de ("sched: Trivial forced-newidle balancer")
-Reported-by: Steven Rostedt <rostedt@goodmis.org>
-Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
-Tested-by: T.J. Alumbaugh <talumbau@chromium.org>
-Link: https://lkml.kernel.org/r/20220330160535.GN8939@worktop.programming.kicks-ass.net
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- kernel/sched/core.c | 14 ++++++++++----
- kernel/sched/idle.c | 1 -
- kernel/sched/sched.h | 6 ------
- 3 files changed, 10 insertions(+), 11 deletions(-)
-
---- a/kernel/sched/core.c
-+++ b/kernel/sched/core.c
-@@ -5669,6 +5669,8 @@ pick_task(struct rq *rq, const struct sc
-
- extern void task_vruntime_update(struct rq *rq, struct task_struct *p, bool in_fi);
-
-+static void queue_core_balance(struct rq *rq);
-+
- static struct task_struct *
- pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
- {
-@@ -5716,7 +5718,7 @@ pick_next_task(struct rq *rq, struct tas
- }
-
- rq->core_pick = NULL;
-- return next;
-+ goto out;
- }
-
- put_prev_task_balance(rq, prev, rf);
-@@ -5763,7 +5765,7 @@ pick_next_task(struct rq *rq, struct tas
- */
- WARN_ON_ONCE(fi_before);
- task_vruntime_update(rq, next, false);
-- goto done;
-+ goto out_set_next;
- }
- }
-
-@@ -5897,8 +5899,12 @@ again:
- resched_curr(rq_i);
- }
-
--done:
-+out_set_next:
- set_next_task(rq, next);
-+out:
-+ if (rq->core->core_forceidle_count && next == rq->idle)
-+ queue_core_balance(rq);
-+
- return next;
- }
-
-@@ -5993,7 +5999,7 @@ static void sched_core_balance(struct rq
-
- static DEFINE_PER_CPU(struct callback_head, core_balance_head);
-
--void queue_core_balance(struct rq *rq)
-+static void queue_core_balance(struct rq *rq)
- {
- if (!sched_core_enabled(rq))
- return;
---- a/kernel/sched/idle.c
-+++ b/kernel/sched/idle.c
-@@ -437,7 +437,6 @@ static void set_next_task_idle(struct rq
- {
- update_idle_core(rq);
- schedstat_inc(rq->sched_goidle);
-- queue_core_balance(rq);
- }
-
- #ifdef CONFIG_SMP
---- a/kernel/sched/sched.h
-+++ b/kernel/sched/sched.h
-@@ -1242,8 +1242,6 @@ static inline bool sched_group_cookie_ma
- return false;
- }
-
--extern void queue_core_balance(struct rq *rq);
--
- static inline bool sched_core_enqueued(struct task_struct *p)
- {
- return !RB_EMPTY_NODE(&p->core_node);
-@@ -1282,10 +1280,6 @@ static inline raw_spinlock_t *__rq_lockp
- return &rq->__lock;
- }
-
--static inline void queue_core_balance(struct rq *rq)
--{
--}
--
- static inline bool sched_cpu_cookie_match(struct rq *rq, struct task_struct *p)
- {
- return true;
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
-@@ -5933,7 +5933,7 @@ static bool try_steal_cookie(int this, i
+@@ -5927,7 +5927,7 @@ static bool try_steal_cookie(int this, i
if (p == src->core_pick || p == src->curr)
goto next;
mm-don-t-skip-swap-entry-even-if-zap_details-specified.patch
drivers-hv-vmbus-replace-smp_store_mb-with-virt_store_mb.patch
x86-bug-prevent-shadowing-in-__warn_flags.patch
-sched-core-fix-forceidle-balancing.patch
sched-teach-the-forced-newidle-balancer-about-cpu-affinity-limitation.patch
x86-static_call-fix-__static_call_return0-for-i386.patch
irqchip-gic-v4-wait-for-gicr_vpendbaser.dirty-to-clear-before-descheduling.patch
+++ /dev/null
-From 5b6547ed97f4f5dfc23f8e3970af6d11d7b7ed7e Mon Sep 17 00:00:00 2001
-From: Peter Zijlstra <peterz@infradead.org>
-Date: Wed, 16 Mar 2022 22:03:41 +0100
-Subject: sched/core: Fix forceidle balancing
-
-From: Peter Zijlstra <peterz@infradead.org>
-
-commit 5b6547ed97f4f5dfc23f8e3970af6d11d7b7ed7e upstream.
-
-Steve reported that ChromeOS encounters the forceidle balancer being
-ran from rt_mutex_setprio()'s balance_callback() invocation and
-explodes.
-
-Now, the forceidle balancer gets queued every time the idle task gets
-selected, set_next_task(), which is strictly too often.
-rt_mutex_setprio() also uses set_next_task() in the 'change' pattern:
-
- queued = task_on_rq_queued(p); /* p->on_rq == TASK_ON_RQ_QUEUED */
- running = task_current(rq, p); /* rq->curr == p */
-
- if (queued)
- dequeue_task(...);
- if (running)
- put_prev_task(...);
-
- /* change task properties */
-
- if (queued)
- enqueue_task(...);
- if (running)
- set_next_task(...);
-
-However, rt_mutex_setprio() will explicitly not run this pattern on
-the idle task (since priority boosting the idle task is quite insane).
-Most other 'change' pattern users are pidhash based and would also not
-apply to idle.
-
-Also, the change pattern doesn't contain a __balance_callback()
-invocation and hence we could have an out-of-band balance-callback,
-which *should* trigger the WARN in rq_pin_lock() (which guards against
-this exact anti-pattern).
-
-So while none of that explains how this happens, it does indicate that
-having it in set_next_task() might not be the most robust option.
-
-Instead, explicitly queue the forceidle balancer from pick_next_task()
-when it does indeed result in forceidle selection. Having it here,
-ensures it can only be triggered under the __schedule() rq->lock
-instance, and hence must be ran from that context.
-
-This also happens to clean up the code a little, so win-win.
-
-Fixes: d2dfa17bc7de ("sched: Trivial forced-newidle balancer")
-Reported-by: Steven Rostedt <rostedt@goodmis.org>
-Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
-Tested-by: T.J. Alumbaugh <talumbau@chromium.org>
-Link: https://lkml.kernel.org/r/20220330160535.GN8939@worktop.programming.kicks-ass.net
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- kernel/sched/core.c | 14 ++++++++++----
- kernel/sched/idle.c | 1 -
- kernel/sched/sched.h | 6 ------
- 3 files changed, 10 insertions(+), 11 deletions(-)
-
---- a/kernel/sched/core.c
-+++ b/kernel/sched/core.c
-@@ -5660,6 +5660,8 @@ static inline struct task_struct *pick_t
-
- extern void task_vruntime_update(struct rq *rq, struct task_struct *p, bool in_fi);
-
-+static void queue_core_balance(struct rq *rq);
-+
- static struct task_struct *
- pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
- {
-@@ -5708,7 +5710,7 @@ pick_next_task(struct rq *rq, struct tas
- }
-
- rq->core_pick = NULL;
-- return next;
-+ goto out;
- }
-
- put_prev_task_balance(rq, prev, rf);
-@@ -5750,7 +5752,7 @@ pick_next_task(struct rq *rq, struct tas
- */
- WARN_ON_ONCE(fi_before);
- task_vruntime_update(rq, next, false);
-- goto done;
-+ goto out_set_next;
- }
- }
-
-@@ -5859,8 +5861,12 @@ pick_next_task(struct rq *rq, struct tas
- resched_curr(rq_i);
- }
-
--done:
-+out_set_next:
- set_next_task(rq, next);
-+out:
-+ if (rq->core->core_forceidle_count && next == rq->idle)
-+ queue_core_balance(rq);
-+
- return next;
- }
-
-@@ -5955,7 +5961,7 @@ static void sched_core_balance(struct rq
-
- static DEFINE_PER_CPU(struct callback_head, core_balance_head);
-
--void queue_core_balance(struct rq *rq)
-+static void queue_core_balance(struct rq *rq)
- {
- if (!sched_core_enabled(rq))
- return;
---- a/kernel/sched/idle.c
-+++ b/kernel/sched/idle.c
-@@ -437,7 +437,6 @@ static void set_next_task_idle(struct rq
- {
- update_idle_core(rq);
- schedstat_inc(rq->sched_goidle);
-- queue_core_balance(rq);
- }
-
- #ifdef CONFIG_SMP
---- a/kernel/sched/sched.h
-+++ b/kernel/sched/sched.h
-@@ -1245,8 +1245,6 @@ static inline bool sched_group_cookie_ma
- return false;
- }
-
--extern void queue_core_balance(struct rq *rq);
--
- static inline bool sched_core_enqueued(struct task_struct *p)
- {
- return !RB_EMPTY_NODE(&p->core_node);
-@@ -1280,10 +1278,6 @@ static inline raw_spinlock_t *__rq_lockp
- return &rq->__lock;
- }
-
--static inline void queue_core_balance(struct rq *rq)
--{
--}
--
- static inline bool sched_cpu_cookie_match(struct rq *rq, struct task_struct *p)
- {
- return true;
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
-@@ -5895,7 +5895,7 @@ static bool try_steal_cookie(int this, i
+@@ -5889,7 +5889,7 @@ static bool try_steal_cookie(int this, i
if (p == src->core_pick || p == src->curr)
goto next;
drivers-hv-vmbus-replace-smp_store_mb-with-virt_store_mb.patch
revert-powerpc-set-max_mapnr-correctly.patch
x86-bug-prevent-shadowing-in-__warn_flags.patch
-sched-core-fix-forceidle-balancing.patch
sched-teach-the-forced-newidle-balancer-about-cpu-affinity-limitation.patch
x86-static_call-fix-__static_call_return0-for-i386.patch
irqchip-gic-v4-wait-for-gicr_vpendbaser.dirty-to-clear-before-descheduling.patch