From: Greg Kroah-Hartman Date: Mon, 24 Mar 2025 20:50:34 +0000 (-0400) Subject: 6.12-stable patches X-Git-Tag: v6.1.132~18 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=7036f7d1adf2b6e5617e626cc6d82310ecbfbbd1;p=thirdparty%2Fkernel%2Fstable-queue.git 6.12-stable patches added patches: revert-sched-core-reduce-cost-of-sched_move_task-when-config-autogroup.patch --- diff --git a/queue-6.12/revert-sched-core-reduce-cost-of-sched_move_task-when-config-autogroup.patch b/queue-6.12/revert-sched-core-reduce-cost-of-sched_move_task-when-config-autogroup.patch new file mode 100644 index 0000000000..7f29e3ff04 --- /dev/null +++ b/queue-6.12/revert-sched-core-reduce-cost-of-sched_move_task-when-config-autogroup.patch @@ -0,0 +1,185 @@ +From 76f970ce51c80f625eb6ddbb24e9cb51b977b598 Mon Sep 17 00:00:00 2001 +From: Dietmar Eggemann +Date: Fri, 14 Mar 2025 16:13:45 +0100 +Subject: Revert "sched/core: Reduce cost of sched_move_task when config autogroup" + +From: Dietmar Eggemann + +commit 76f970ce51c80f625eb6ddbb24e9cb51b977b598 upstream. + +This reverts commit eff6c8ce8d4d7faef75f66614dd20bb50595d261. + +Hazem reported a 30% drop in UnixBench spawn test with commit +eff6c8ce8d4d ("sched/core: Reduce cost of sched_move_task when config +autogroup") on a m6g.xlarge AWS EC2 instance with 4 vCPUs and 16 GiB RAM +(aarch64) (single level MC sched domain): + + https://lkml.kernel.org/r/20250205151026.13061-1-hagarhem@amazon.com + +There is an early bail from sched_move_task() if p->sched_task_group is +equal to p's 'cpu cgroup' (sched_get_task_group()). E.g. both are +pointing to taskgroup '/user.slice/user-1000.slice/session-1.scope' +(Ubuntu '22.04.5 LTS'). + +So in: + + do_exit() + + sched_autogroup_exit_task() + + sched_move_task() + + if sched_get_task_group(p) == p->sched_task_group + return + + /* p is enqueued */ + dequeue_task() \ + sched_change_group() | + task_change_group_fair() | + detach_task_cfs_rq() | (1) + set_task_rq() | + attach_task_cfs_rq() | + enqueue_task() / + +(1) isn't called for p anymore. + +Turns out that the regression is related to sgs->group_util in +group_is_overloaded() and group_has_capacity(). If (1) isn't called for +all the 'spawn' tasks then sgs->group_util is ~900 and +sgs->group_capacity = 1024 (single CPU sched domain) and this leads to +group_is_overloaded() returning true (2) and group_has_capacity() false +(3) much more often compared to the case when (1) is called. + +I.e. there are much more cases of 'group_is_overloaded' and +'group_fully_busy' in WF_FORK wakeup sched_balance_find_dst_cpu() which +then returns much more often a CPU != smp_processor_id() (5). + +This isn't good for these extremely short running tasks (FORK + EXIT) +and also involves calling sched_balance_find_dst_group_cpu() unnecessary +(single CPU sched domain). + +Instead if (1) is called for 'p->flags & PF_EXITING' then the path +(4),(6) is taken much more often. + + select_task_rq_fair(..., wake_flags = WF_FORK) + + cpu = smp_processor_id() + + new_cpu = sched_balance_find_dst_cpu(..., cpu, ...) + + group = sched_balance_find_dst_group(..., cpu) + + do { + + update_sg_wakeup_stats() + + sgs->group_type = group_classify() + + if group_is_overloaded() (2) + return group_overloaded + + if !group_has_capacity() (3) + return group_fully_busy + + return group_has_spare (4) + + } while group + + if local_sgs.group_type > idlest_sgs.group_type + return idlest (5) + + case group_has_spare: + + if local_sgs.idle_cpus >= idlest_sgs.idle_cpus + return NULL (6) + +Unixbench Tests './Run -c 4 spawn' on: + +(a) VM AWS instance (m7gd.16xlarge) with v6.13 ('maxcpus=4 nr_cpus=4') + and Ubuntu 22.04.5 LTS (aarch64). + + Shell & test run in '/user.slice/user-1000.slice/session-1.scope'. + + w/o patch w/ patch + 21005 27120 + +(b) i7-13700K with tip/sched/core ('nosmt maxcpus=8 nr_cpus=8') and + Ubuntu 22.04.5 LTS (x86_64). + + Shell & test run in '/A'. + + w/o patch w/ patch + 67675 88806 + +CONFIG_SCHED_AUTOGROUP=y & /sys/proc/kernel/sched_autogroup_enabled equal +0 or 1. + +Reported-by: Hazem Mohamed Abuelfotoh +Signed-off-by: Dietmar Eggemann +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Ingo Molnar +Reviewed-by: Vincent Guittot +Tested-by: Hagar Hemdan +Cc: Linus Torvalds +Link: https://lore.kernel.org/r/20250314151345.275739-1-dietmar.eggemann@arm.com +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched/core.c | 21 +++------------------ + 1 file changed, 3 insertions(+), 18 deletions(-) + +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -8919,7 +8919,7 @@ void sched_release_group(struct task_gro + spin_unlock_irqrestore(&task_group_lock, flags); + } + +-static struct task_group *sched_get_task_group(struct task_struct *tsk) ++static void sched_change_group(struct task_struct *tsk) + { + struct task_group *tg; + +@@ -8931,13 +8931,7 @@ static struct task_group *sched_get_task + tg = container_of(task_css_check(tsk, cpu_cgrp_id, true), + struct task_group, css); + tg = autogroup_task_group(tsk, tg); +- +- return tg; +-} +- +-static void sched_change_group(struct task_struct *tsk, struct task_group *group) +-{ +- tsk->sched_task_group = group; ++ tsk->sched_task_group = tg; + + #ifdef CONFIG_FAIR_GROUP_SCHED + if (tsk->sched_class->task_change_group) +@@ -8958,20 +8952,11 @@ void sched_move_task(struct task_struct + { + int queued, running, queue_flags = + DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK; +- struct task_group *group; + struct rq *rq; + + CLASS(task_rq_lock, rq_guard)(tsk); + rq = rq_guard.rq; + +- /* +- * Esp. with SCHED_AUTOGROUP enabled it is possible to get superfluous +- * group changes. +- */ +- group = sched_get_task_group(tsk); +- if (group == tsk->sched_task_group) +- return; +- + update_rq_clock(rq); + + running = task_current(rq, tsk); +@@ -8982,7 +8967,7 @@ void sched_move_task(struct task_struct + if (running) + put_prev_task(rq, tsk); + +- sched_change_group(tsk, group); ++ sched_change_group(tsk); + if (!for_autogroup) + scx_cgroup_move_task(tsk); + diff --git a/queue-6.12/series b/queue-6.12/series index fa35e27f35..3918994fa3 100644 --- a/queue-6.12/series +++ b/queue-6.12/series @@ -109,3 +109,4 @@ kvm-arm64-remove-vhe-host-restore-of-cpacr_el1.smen.patch kvm-arm64-refactor-exit-handlers.patch kvm-arm64-mark-some-header-functions-as-inline.patch kvm-arm64-eagerly-switch-zcr_el-1-2.patch +revert-sched-core-reduce-cost-of-sched_move_task-when-config-autogroup.patch