]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.6-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 16 Jul 2024 14:16:29 +0000 (16:16 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 16 Jul 2024 14:16:29 +0000 (16:16 +0200)
added patches:
sched-move-psi_account_irqtime-out-of-update_rq_clock_task-hotpath.patch

queue-6.6/sched-move-psi_account_irqtime-out-of-update_rq_clock_task-hotpath.patch [new file with mode: 0644]
queue-6.6/series

diff --git a/queue-6.6/sched-move-psi_account_irqtime-out-of-update_rq_clock_task-hotpath.patch b/queue-6.6/sched-move-psi_account_irqtime-out-of-update_rq_clock_task-hotpath.patch
new file mode 100644 (file)
index 0000000..288f3e4
--- /dev/null
@@ -0,0 +1,186 @@
+From ddae0ca2a8fe12d0e24ab10ba759c3fbd755ada8 Mon Sep 17 00:00:00 2001
+From: John Stultz <jstultz@google.com>
+Date: Tue, 18 Jun 2024 14:58:55 -0700
+Subject: sched: Move psi_account_irqtime() out of update_rq_clock_task() hotpath
+
+From: John Stultz <jstultz@google.com>
+
+commit ddae0ca2a8fe12d0e24ab10ba759c3fbd755ada8 upstream.
+
+It was reported that in moving to 6.1, a larger then 10%
+regression was seen in the performance of
+clock_gettime(CLOCK_THREAD_CPUTIME_ID,...).
+
+Using a simple reproducer, I found:
+5.10:
+100000000 calls in 24345994193 ns => 243.460 ns per call
+100000000 calls in 24288172050 ns => 242.882 ns per call
+100000000 calls in 24289135225 ns => 242.891 ns per call
+
+6.1:
+100000000 calls in 28248646742 ns => 282.486 ns per call
+100000000 calls in 28227055067 ns => 282.271 ns per call
+100000000 calls in 28177471287 ns => 281.775 ns per call
+
+The cause of this was finally narrowed down to the addition of
+psi_account_irqtime() in update_rq_clock_task(), in commit
+52b1364ba0b1 ("sched/psi: Add PSI_IRQ to track IRQ/SOFTIRQ
+pressure").
+
+In my initial attempt to resolve this, I leaned towards moving
+all accounting work out of the clock_gettime() call path, but it
+wasn't very pretty, so it will have to wait for a later deeper
+rework. Instead, Peter shared this approach:
+
+Rework psi_account_irqtime() to use its own psi_irq_time base
+for accounting, and move it out of the hotpath, calling it
+instead from sched_tick() and __schedule().
+
+In testing this, we found the importance of ensuring
+psi_account_irqtime() is run under the rq_lock, which Johannes
+Weiner helpfully explained, so also add some lockdep annotations
+to make that requirement clear.
+
+With this change the performance is back in-line with 5.10:
+6.1+fix:
+100000000 calls in 24297324597 ns => 242.973 ns per call
+100000000 calls in 24318869234 ns => 243.189 ns per call
+100000000 calls in 24291564588 ns => 242.916 ns per call
+
+Reported-by: Jimmy Shiu <jimmyshiu@google.com>
+Originally-by: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: John Stultz <jstultz@google.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Chengming Zhou <chengming.zhou@linux.dev>
+Reviewed-by: Qais Yousef <qyousef@layalina.io>
+Link: https://lore.kernel.org/r/20240618215909.4099720-1-jstultz@google.com
+Fixes: 52b1364ba0b1 ("sched/psi: Add PSI_IRQ to track IRQ/SOFTIRQ pressure")
+[jstultz: Fixed up minor collisions w/ 6.6-stable]
+Signed-off-by: John Stultz <jstultz@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/sched/core.c  |    7 +++++--
+ kernel/sched/psi.c   |   21 ++++++++++++++++-----
+ kernel/sched/sched.h |    1 +
+ kernel/sched/stats.h |   11 ++++++++---
+ 4 files changed, 30 insertions(+), 10 deletions(-)
+
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -722,7 +722,6 @@ static void update_rq_clock_task(struct
+       rq->prev_irq_time += irq_delta;
+       delta -= irq_delta;
+-      psi_account_irqtime(rq->curr, irq_delta);
+       delayacct_irq(rq->curr, irq_delta);
+ #endif
+ #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+@@ -5641,7 +5640,7 @@ void scheduler_tick(void)
+ {
+       int cpu = smp_processor_id();
+       struct rq *rq = cpu_rq(cpu);
+-      struct task_struct *curr = rq->curr;
++      struct task_struct *curr;
+       struct rq_flags rf;
+       unsigned long thermal_pressure;
+       u64 resched_latency;
+@@ -5653,6 +5652,9 @@ void scheduler_tick(void)
+       rq_lock(rq, &rf);
++      curr = rq->curr;
++      psi_account_irqtime(rq, curr, NULL);
++
+       update_rq_clock(rq);
+       thermal_pressure = arch_scale_thermal_pressure(cpu_of(rq));
+       update_thermal_load_avg(rq_clock_thermal(rq), rq, thermal_pressure);
+@@ -6690,6 +6692,7 @@ static void __sched notrace __schedule(u
+               ++*switch_count;
+               migrate_disable_switch(rq, prev);
++              psi_account_irqtime(rq, prev, next);
+               psi_sched_switch(prev, next, !task_on_rq_queued(prev));
+               trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next, prev_state);
+--- a/kernel/sched/psi.c
++++ b/kernel/sched/psi.c
+@@ -784,6 +784,7 @@ static void psi_group_change(struct psi_
+       enum psi_states s;
+       u32 state_mask;
++      lockdep_assert_rq_held(cpu_rq(cpu));
+       groupc = per_cpu_ptr(group->pcpu, cpu);
+       /*
+@@ -1002,19 +1003,29 @@ void psi_task_switch(struct task_struct
+ }
+ #ifdef CONFIG_IRQ_TIME_ACCOUNTING
+-void psi_account_irqtime(struct task_struct *task, u32 delta)
++void psi_account_irqtime(struct rq *rq, struct task_struct *curr, struct task_struct *prev)
+ {
+-      int cpu = task_cpu(task);
++      int cpu = task_cpu(curr);
+       struct psi_group *group;
+       struct psi_group_cpu *groupc;
+-      u64 now;
++      u64 now, irq;
++      s64 delta;
+-      if (!task->pid)
++      if (!curr->pid)
++              return;
++
++      lockdep_assert_rq_held(rq);
++      group = task_psi_group(curr);
++      if (prev && task_psi_group(prev) == group)
+               return;
+       now = cpu_clock(cpu);
++      irq = irq_time_read(cpu);
++      delta = (s64)(irq - rq->psi_irq_time);
++      if (delta < 0)
++              return;
++      rq->psi_irq_time = irq;
+-      group = task_psi_group(task);
+       do {
+               if (!group->enabled)
+                       continue;
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -1094,6 +1094,7 @@ struct rq {
+ #ifdef CONFIG_IRQ_TIME_ACCOUNTING
+       u64                     prev_irq_time;
++      u64                     psi_irq_time;
+ #endif
+ #ifdef CONFIG_PARAVIRT
+       u64                     prev_steal_time;
+--- a/kernel/sched/stats.h
++++ b/kernel/sched/stats.h
+@@ -110,8 +110,12 @@ __schedstats_from_se(struct sched_entity
+ void psi_task_change(struct task_struct *task, int clear, int set);
+ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
+                    bool sleep);
+-void psi_account_irqtime(struct task_struct *task, u32 delta);
+-
++#ifdef CONFIG_IRQ_TIME_ACCOUNTING
++void psi_account_irqtime(struct rq *rq, struct task_struct *curr, struct task_struct *prev);
++#else
++static inline void psi_account_irqtime(struct rq *rq, struct task_struct *curr,
++                                     struct task_struct *prev) {}
++#endif /*CONFIG_IRQ_TIME_ACCOUNTING */
+ /*
+  * PSI tracks state that persists across sleeps, such as iowaits and
+  * memory stalls. As a result, it has to distinguish between sleeps,
+@@ -192,7 +196,8 @@ static inline void psi_ttwu_dequeue(stru
+ static inline void psi_sched_switch(struct task_struct *prev,
+                                   struct task_struct *next,
+                                   bool sleep) {}
+-static inline void psi_account_irqtime(struct task_struct *task, u32 delta) {}
++static inline void psi_account_irqtime(struct rq *rq, struct task_struct *curr,
++                                     struct task_struct *prev) {}
+ #endif /* CONFIG_PSI */
+ #ifdef CONFIG_SCHED_INFO
index b2f0789551423fe62f517a8d9e37c48b675c7326..bbf186e4a32f480823f3446b7e2a937eecf624e1 100644 (file)
@@ -105,3 +105,4 @@ mm-damon-core-merge-regions-aggressively-when-max_nr_regions-is-unmet.patch
 selftests-net-fix-gro.c-compilation-failure-due-to-non-existent-opt_ipproto_off.patch
 nilfs2-fix-kernel-bug-on-rename-operation-of-broken-directory.patch
 ext4-avoid-ptr-null-pointer-dereference.patch
+sched-move-psi_account_irqtime-out-of-update_rq_clock_task-hotpath.patch