Fixes for 6.6

author Sasha Levin <sashal@kernel.org>

Thu, 12 Dec 2024 01:10:52 +0000 (20:10 -0500)

committer Sasha Levin <sashal@kernel.org>

Thu, 12 Dec 2024 01:10:52 +0000 (20:10 -0500)
author Sasha Levin <sashal@kernel.org>
Thu, 12 Dec 2024 01:10:52 +0000 (20:10 -0500)
committer Sasha Levin <sashal@kernel.org>
Thu, 12 Dec 2024 01:10:52 +0000 (20:10 -0500)
diff --git a/queue-6.6/btrfs-fix-missing-snapshot-drew-unlock-when-root-is-.patch b/queue-6.6/btrfs-fix-missing-snapshot-drew-unlock-when-root-is-.patch

new file mode 100644 (file)

index 0000000..3bdb466
--- /dev/null
+++ b/queue-6.6/btrfs-fix-missing-snapshot-drew-unlock-when-root-is-.patch
@@ -0,0 +1,41 @@
+From 1605e10745f6c295c736fb38beb4354cdf7ba85c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 29 Nov 2024 13:33:03 +0000
+Subject: btrfs: fix missing snapshot drew unlock when root is dead during swap
+ activation
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit 9c803c474c6c002d8ade68ebe99026cc39c37f85 ]
+
+When activating a swap file we acquire the root's snapshot drew lock and
+then check if the root is dead, failing and returning with -EPERM if it's
+dead but without unlocking the root's snapshot lock. Fix this by adding
+the missing unlock.
+
+Fixes: 60021bd754c6 ("btrfs: prevent subvol with swapfile from being deleted")
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/inode.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
+index ea19ea75674d2..035815c439498 100644
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -10693,6 +10693,7 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
+       if (btrfs_root_dead(root)) {
+               spin_unlock(&root->root_item_lock);
+ 
++              btrfs_drew_write_unlock(&root->snapshot_lock);
+               btrfs_exclop_finish(fs_info);
+               btrfs_warn(fs_info,
+               "cannot activate swapfile because subvolume %llu is being deleted",
+-- 
+2.43.0
+
diff --git a/queue-6.6/clk-en7523-initialize-num-before-accessing-hws-in-en.patch b/queue-6.6/clk-en7523-initialize-num-before-accessing-hws-in-en.patch

new file mode 100644 (file)

index 0000000..593a60b
--- /dev/null
+++ b/queue-6.6/clk-en7523-initialize-num-before-accessing-hws-in-en.patch
@@ -0,0 +1,49 @@
+From 2ba38c2e1a95aad9e06d890de1806810f99a4cfa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 3 Dec 2024 22:29:15 +0800
+Subject: clk: en7523: Initialize num before accessing hws in
+ en7523_register_clocks()
+
+From: Haoyu Li <lihaoyu499@gmail.com>
+
+[ Upstream commit 52fd1709e41d3a85b48bcfe2404a024ebaf30c3b ]
+
+With the new __counted_by annotation in clk_hw_onecell_data, the "num"
+struct member must be set before accessing the "hws" array. Failing to
+do so will trigger a runtime warning when enabling CONFIG_UBSAN_BOUNDS
+and CONFIG_FORTIFY_SOURCE.
+
+Fixes: f316cdff8d67 ("clk: Annotate struct clk_hw_onecell_data with __counted_by")
+Signed-off-by: Haoyu Li <lihaoyu499@gmail.com>
+Link: https://lore.kernel.org/r/20241203142915.345523-1-lihaoyu499@gmail.com
+Signed-off-by: Stephen Boyd <sboyd@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/clk/clk-en7523.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/clk/clk-en7523.c b/drivers/clk/clk-en7523.c
+index 7914e60f3d6c5..1331b4bacf0b6 100644
+--- a/drivers/clk/clk-en7523.c
++++ b/drivers/clk/clk-en7523.c
+@@ -284,6 +284,8 @@ static void en7523_register_clocks(struct device *dev, struct clk_hw_onecell_dat
+       u32 rate;
+       int i;
+ 
++      clk_data->num = EN7523_NUM_CLOCKS;
++
+       for (i = 0; i < ARRAY_SIZE(en7523_base_clks); i++) {
+               const struct en_clk_desc *desc = &en7523_base_clks[i];
+ 
+@@ -302,8 +304,6 @@ static void en7523_register_clocks(struct device *dev, struct clk_hw_onecell_dat
+ 
+       hw = en7523_register_pcie_clk(dev, np_base);
+       clk_data->hws[EN7523_CLK_PCIE] = hw;
+-
+-      clk_data->num = EN7523_NUM_CLOCKS;
+ }
+ 
+ static int en7523_clk_probe(struct platform_device *pdev)
+-- 
+2.43.0
+
diff --git a/queue-6.6/sched-core-prevent-wakeup-of-ksoftirqd-during-idle-l.patch b/queue-6.6/sched-core-prevent-wakeup-of-ksoftirqd-during-idle-l.patch

new file mode 100644 (file)

index 0000000..a6f43ca
--- /dev/null
+++ b/queue-6.6/sched-core-prevent-wakeup-of-ksoftirqd-during-idle-l.patch
@@ -0,0 +1,71 @@
+From 75b03fdc8dab8e64d761c690fa62dcc38e12d891 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 19 Nov 2024 05:44:32 +0000
+Subject: sched/core: Prevent wakeup of ksoftirqd during idle load balance
+
+From: K Prateek Nayak <kprateek.nayak@amd.com>
+
+[ Upstream commit e932c4ab38f072ce5894b2851fea8bc5754bb8e5 ]
+
+Scheduler raises a SCHED_SOFTIRQ to trigger a load balancing event on
+from the IPI handler on the idle CPU. If the SMP function is invoked
+from an idle CPU via flush_smp_call_function_queue() then the HARD-IRQ
+flag is not set and raise_softirq_irqoff() needlessly wakes ksoftirqd
+because soft interrupts are handled before ksoftirqd get on the CPU.
+
+Adding a trace_printk() in nohz_csd_func() at the spot of raising
+SCHED_SOFTIRQ and enabling trace events for sched_switch, sched_wakeup,
+and softirq_entry (for SCHED_SOFTIRQ vector alone) helps observing the
+current behavior:
+
+       <idle>-0   [000] dN.1.:  nohz_csd_func: Raising SCHED_SOFTIRQ from nohz_csd_func
+       <idle>-0   [000] dN.4.:  sched_wakeup: comm=ksoftirqd/0 pid=16 prio=120 target_cpu=000
+       <idle>-0   [000] .Ns1.:  softirq_entry: vec=7 [action=SCHED]
+       <idle>-0   [000] .Ns1.:  softirq_exit: vec=7  [action=SCHED]
+       <idle>-0   [000] d..2.:  sched_switch: prev_comm=swapper/0 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=ksoftirqd/0 next_pid=16 next_prio=120
+  ksoftirqd/0-16  [000] d..2.:  sched_switch: prev_comm=ksoftirqd/0 prev_pid=16 prev_prio=120 prev_state=S ==> next_comm=swapper/0 next_pid=0 next_prio=120
+       ...
+
+Use __raise_softirq_irqoff() to raise the softirq. The SMP function call
+is always invoked on the requested CPU in an interrupt handler. It is
+guaranteed that soft interrupts are handled at the end.
+
+Following are the observations with the changes when enabling the same
+set of events:
+
+       <idle>-0       [000] dN.1.: nohz_csd_func: Raising SCHED_SOFTIRQ for nohz_idle_balance
+       <idle>-0       [000] dN.1.: softirq_raise: vec=7 [action=SCHED]
+       <idle>-0       [000] .Ns1.: softirq_entry: vec=7 [action=SCHED]
+
+No unnecessary ksoftirqd wakeups are seen from idle task's context to
+service the softirq.
+
+Fixes: b2a02fc43a1f ("smp: Optimize send_call_function_single_ipi()")
+Closes: https://lore.kernel.org/lkml/fcf823f-195e-6c9a-eac3-25f870cb35ac@inria.fr/ [1]
+Reported-by: Julia Lawall <julia.lawall@inria.fr>
+Suggested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Link: https://lore.kernel.org/r/20241119054432.6405-5-kprateek.nayak@amd.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/core.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 8abd1bf31864e..da14c7450156b 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -1183,7 +1183,7 @@ static void nohz_csd_func(void *info)
+       rq->idle_balance = idle_cpu(cpu);
+       if (rq->idle_balance) {
+               rq->nohz_idle_balance = flags;
+-              raise_softirq_irqoff(SCHED_SOFTIRQ);
++              __raise_softirq_irqoff(SCHED_SOFTIRQ);
+       }
+ }
+ 
+-- 
+2.43.0
+
diff --git a/queue-6.6/sched-core-remove-the-unnecessary-need_resched-check.patch b/queue-6.6/sched-core-remove-the-unnecessary-need_resched-check.patch

new file mode 100644 (file)

index 0000000..d5be741
--- /dev/null
+++ b/queue-6.6/sched-core-remove-the-unnecessary-need_resched-check.patch
@@ -0,0 +1,122 @@
+From 76c0bd30b8f57a72e9d513123a9cdcbda4772822 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 19 Nov 2024 05:44:30 +0000
+Subject: sched/core: Remove the unnecessary need_resched() check in
+ nohz_csd_func()
+
+From: K Prateek Nayak <kprateek.nayak@amd.com>
+
+[ Upstream commit ea9cffc0a154124821531991d5afdd7e8b20d7aa ]
+
+The need_resched() check currently in nohz_csd_func() can be tracked
+to have been added in scheduler_ipi() back in 2011 via commit
+ca38062e57e9 ("sched: Use resched IPI to kick off the nohz idle balance")
+
+Since then, it has travelled quite a bit but it seems like an idle_cpu()
+check currently is sufficient to detect the need to bail out from an
+idle load balancing. To justify this removal, consider all the following
+case where an idle load balancing could race with a task wakeup:
+
+o Since commit f3dd3f674555b ("sched: Remove the limitation of WF_ON_CPU
+  on wakelist if wakee cpu is idle") a target perceived to be idle
+  (target_rq->nr_running == 0) will return true for
+  ttwu_queue_cond(target) which will offload the task wakeup to the idle
+  target via an IPI.
+
+  In all such cases target_rq->ttwu_pending will be set to 1 before
+  queuing the wake function.
+
+  If an idle load balance races here, following scenarios are possible:
+
+  - The CPU is not in TIF_POLLING_NRFLAG mode in which case an actual
+    IPI is sent to the CPU to wake it out of idle. If the
+    nohz_csd_func() queues before sched_ttwu_pending(), the idle load
+    balance will bail out since idle_cpu(target) returns 0 since
+    target_rq->ttwu_pending is 1. If the nohz_csd_func() is queued after
+    sched_ttwu_pending() it should see rq->nr_running to be non-zero and
+    bail out of idle load balancing.
+
+  - The CPU is in TIF_POLLING_NRFLAG mode and instead of an actual IPI,
+    the sender will simply set TIF_NEED_RESCHED for the target to put it
+    out of idle and flush_smp_call_function_queue() in do_idle() will
+    execute the call function. Depending on the ordering of the queuing
+    of nohz_csd_func() and sched_ttwu_pending(), the idle_cpu() check in
+    nohz_csd_func() should either see target_rq->ttwu_pending = 1 or
+    target_rq->nr_running to be non-zero if there is a genuine task
+    wakeup racing with the idle load balance kick.
+
+o The waker CPU perceives the target CPU to be busy
+  (targer_rq->nr_running != 0) but the CPU is in fact going idle and due
+  to a series of unfortunate events, the system reaches a case where the
+  waker CPU decides to perform the wakeup by itself in ttwu_queue() on
+  the target CPU but target is concurrently selected for idle load
+  balance (XXX: Can this happen? I'm not sure, but we'll consider the
+  mother of all coincidences to estimate the worst case scenario).
+
+  ttwu_do_activate() calls enqueue_task() which would increment
+  "rq->nr_running" post which it calls wakeup_preempt() which is
+  responsible for setting TIF_NEED_RESCHED (via a resched IPI or by
+  setting TIF_NEED_RESCHED on a TIF_POLLING_NRFLAG idle CPU) The key
+  thing to note in this case is that rq->nr_running is already non-zero
+  in case of a wakeup before TIF_NEED_RESCHED is set which would
+  lead to idle_cpu() check returning false.
+
+In all cases, it seems that need_resched() check is unnecessary when
+checking for idle_cpu() first since an impending wakeup racing with idle
+load balancer will either set the "rq->ttwu_pending" or indicate a newly
+woken task via "rq->nr_running".
+
+Chasing the reason why this check might have existed in the first place,
+I came across  Peter's suggestion on the fist iteration of Suresh's
+patch from 2011 [1] where the condition to raise the SCHED_SOFTIRQ was:
+
+       sched_ttwu_do_pending(list);
+
+       if (unlikely((rq->idle == current) &&
+           rq->nohz_balance_kick &&
+           !need_resched()))
+               raise_softirq_irqoff(SCHED_SOFTIRQ);
+
+Since the condition to raise the SCHED_SOFIRQ was preceded by
+sched_ttwu_do_pending() (which is equivalent of sched_ttwu_pending()) in
+the current upstream kernel, the need_resched() check was necessary to
+catch a newly queued task. Peter suggested modifying it to:
+
+       if (idle_cpu() && rq->nohz_balance_kick && !need_resched())
+               raise_softirq_irqoff(SCHED_SOFTIRQ);
+
+where idle_cpu() seems to have replaced "rq->idle == current" check.
+
+Even back then, the idle_cpu() check would have been sufficient to catch
+a new task being enqueued. Since commit b2a02fc43a1f ("smp: Optimize
+send_call_function_single_ipi()") overloads the interpretation of
+TIF_NEED_RESCHED for TIF_POLLING_NRFLAG idling, remove the
+need_resched() check in nohz_csd_func() to raise SCHED_SOFTIRQ based
+on Peter's suggestion.
+
+Fixes: b2a02fc43a1f ("smp: Optimize send_call_function_single_ipi()")
+Suggested-by: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20241119054432.6405-3-kprateek.nayak@amd.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/core.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index b6f922a20f83a..8abd1bf31864e 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -1181,7 +1181,7 @@ static void nohz_csd_func(void *info)
+       WARN_ON(!(flags & NOHZ_KICK_MASK));
+ 
+       rq->idle_balance = idle_cpu(cpu);
+-      if (rq->idle_balance && !need_resched()) {
++      if (rq->idle_balance) {
+               rq->nohz_idle_balance = flags;
+               raise_softirq_irqoff(SCHED_SOFTIRQ);
+       }
+-- 
+2.43.0
+
diff --git a/queue-6.6/sched-deadline-collect-sched_dl_entity-initializatio.patch b/queue-6.6/sched-deadline-collect-sched_dl_entity-initializatio.patch

new file mode 100644 (file)

index 0000000..013baf5
--- /dev/null
+++ b/queue-6.6/sched-deadline-collect-sched_dl_entity-initializatio.patch
@@ -0,0 +1,143 @@
+From 775f0261940f64fcd57eeac0950684c3fe73b2af Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 4 Nov 2023 11:59:19 +0100
+Subject: sched/deadline: Collect sched_dl_entity initialization
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+[ Upstream commit 9e07d45c5210f5dd6701c00d55791983db7320fa ]
+
+Create a single function that initializes a sched_dl_entity.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Daniel Bristot de Oliveira <bristot@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Phil Auld <pauld@redhat.com>
+Reviewed-by: Valentin Schneider <vschneid@redhat.com>
+Link: https://lkml.kernel.org/r/51acc695eecf0a1a2f78f9a044e11ffd9b316bcf.1699095159.git.bristot@kernel.org
+Stable-dep-of: 0664e2c311b9 ("sched/deadline: Fix warning in migrate_enable for boosted tasks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/core.c     |  5 +----
+ kernel/sched/deadline.c | 22 +++++++++++++++-------
+ kernel/sched/sched.h    |  5 +----
+ 3 files changed, 17 insertions(+), 15 deletions(-)
+
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 7181e6aae16b4..228f7c07da728 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -4513,10 +4513,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
+       memset(&p->stats, 0, sizeof(p->stats));
+ #endif
+ 
+-      RB_CLEAR_NODE(&p->dl.rb_node);
+-      init_dl_task_timer(&p->dl);
+-      init_dl_inactive_task_timer(&p->dl);
+-      __dl_clear_params(p);
++      init_dl_entity(&p->dl);
+ 
+       INIT_LIST_HEAD(&p->rt.run_list);
+       p->rt.timeout           = 0;
+diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
+index 6421d28553576..97b548c343ddd 100644
+--- a/kernel/sched/deadline.c
++++ b/kernel/sched/deadline.c
+@@ -335,6 +335,8 @@ static void dl_change_utilization(struct task_struct *p, u64 new_bw)
+       __add_rq_bw(new_bw, &rq->dl);
+ }
+ 
++static void __dl_clear_params(struct sched_dl_entity *dl_se);
++
+ /*
+  * The utilization of a task cannot be immediately removed from
+  * the rq active utilization (running_bw) when the task blocks.
+@@ -434,7 +436,7 @@ static void task_non_contending(struct task_struct *p)
+                       raw_spin_lock(&dl_b->lock);
+                       __dl_sub(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
+                       raw_spin_unlock(&dl_b->lock);
+-                      __dl_clear_params(p);
++                      __dl_clear_params(dl_se);
+               }
+ 
+               return;
+@@ -1207,7 +1209,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
+       return HRTIMER_NORESTART;
+ }
+ 
+-void init_dl_task_timer(struct sched_dl_entity *dl_se)
++static void init_dl_task_timer(struct sched_dl_entity *dl_se)
+ {
+       struct hrtimer *timer = &dl_se->dl_timer;
+ 
+@@ -1413,7 +1415,7 @@ static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer)
+               raw_spin_lock(&dl_b->lock);
+               __dl_sub(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
+               raw_spin_unlock(&dl_b->lock);
+-              __dl_clear_params(p);
++              __dl_clear_params(dl_se);
+ 
+               goto unlock;
+       }
+@@ -1429,7 +1431,7 @@ static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer)
+       return HRTIMER_NORESTART;
+ }
+ 
+-void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se)
++static void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se)
+ {
+       struct hrtimer *timer = &dl_se->inactive_timer;
+ 
+@@ -2986,10 +2988,8 @@ bool __checkparam_dl(const struct sched_attr *attr)
+ /*
+  * This function clears the sched_dl_entity static params.
+  */
+-void __dl_clear_params(struct task_struct *p)
++static void __dl_clear_params(struct sched_dl_entity *dl_se)
+ {
+-      struct sched_dl_entity *dl_se = &p->dl;
+-
+       dl_se->dl_runtime               = 0;
+       dl_se->dl_deadline              = 0;
+       dl_se->dl_period                = 0;
+@@ -3007,6 +3007,14 @@ void __dl_clear_params(struct task_struct *p)
+ #endif
+ }
+ 
++void init_dl_entity(struct sched_dl_entity *dl_se)
++{
++      RB_CLEAR_NODE(&dl_se->rb_node);
++      init_dl_task_timer(dl_se);
++      init_dl_inactive_task_timer(dl_se);
++      __dl_clear_params(dl_se);
++}
++
+ bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)
+ {
+       struct sched_dl_entity *dl_se = &p->dl;
+diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
+index 1d586e7576bc2..992ac92d021d2 100644
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -286,8 +286,6 @@ struct rt_bandwidth {
+       unsigned int            rt_period_active;
+ };
+ 
+-void __dl_clear_params(struct task_struct *p);
+-
+ static inline int dl_bandwidth_enabled(void)
+ {
+       return sysctl_sched_rt_runtime >= 0;
+@@ -2446,8 +2444,7 @@ extern struct rt_bandwidth def_rt_bandwidth;
+ extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
+ extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);
+ 
+-extern void init_dl_task_timer(struct sched_dl_entity *dl_se);
+-extern void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se);
++extern void init_dl_entity(struct sched_dl_entity *dl_se);
+ 
+ #define BW_SHIFT              20
+ #define BW_UNIT                       (1 << BW_SHIFT)
+-- 
+2.43.0
+
diff --git a/queue-6.6/sched-deadline-fix-warning-in-migrate_enable-for-boo.patch b/queue-6.6/sched-deadline-fix-warning-in-migrate_enable-for-boo.patch

new file mode 100644 (file)

index 0000000..33464f9
--- /dev/null
+++ b/queue-6.6/sched-deadline-fix-warning-in-migrate_enable-for-boo.patch
@@ -0,0 +1,80 @@
+From f20a5fb26b13eb73e6ca14d5e7afa61b92fd129d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 24 Jul 2024 11:22:47 -0300
+Subject: sched/deadline: Fix warning in migrate_enable for boosted tasks
+
+From: Wander Lairson Costa <wander@redhat.com>
+
+[ Upstream commit 0664e2c311b9fa43b33e3e81429cd0c2d7f9c638 ]
+
+When running the following command:
+
+while true; do
+    stress-ng --cyclic 30 --timeout 30s --minimize --quiet
+done
+
+a warning is eventually triggered:
+
+WARNING: CPU: 43 PID: 2848 at kernel/sched/deadline.c:794
+setup_new_dl_entity+0x13e/0x180
+...
+Call Trace:
+ <TASK>
+ ? show_trace_log_lvl+0x1c4/0x2df
+ ? enqueue_dl_entity+0x631/0x6e0
+ ? setup_new_dl_entity+0x13e/0x180
+ ? __warn+0x7e/0xd0
+ ? report_bug+0x11a/0x1a0
+ ? handle_bug+0x3c/0x70
+ ? exc_invalid_op+0x14/0x70
+ ? asm_exc_invalid_op+0x16/0x20
+ enqueue_dl_entity+0x631/0x6e0
+ enqueue_task_dl+0x7d/0x120
+ __do_set_cpus_allowed+0xe3/0x280
+ __set_cpus_allowed_ptr_locked+0x140/0x1d0
+ __set_cpus_allowed_ptr+0x54/0xa0
+ migrate_enable+0x7e/0x150
+ rt_spin_unlock+0x1c/0x90
+ group_send_sig_info+0xf7/0x1a0
+ ? kill_pid_info+0x1f/0x1d0
+ kill_pid_info+0x78/0x1d0
+ kill_proc_info+0x5b/0x110
+ __x64_sys_kill+0x93/0xc0
+ do_syscall_64+0x5c/0xf0
+ entry_SYSCALL_64_after_hwframe+0x6e/0x76
+ RIP: 0033:0x7f0dab31f92b
+
+This warning occurs because set_cpus_allowed dequeues and enqueues tasks
+with the ENQUEUE_RESTORE flag set. If the task is boosted, the warning
+is triggered. A boosted task already had its parameters set by
+rt_mutex_setprio, and a new call to setup_new_dl_entity is unnecessary,
+hence the WARN_ON call.
+
+Check if we are requeueing a boosted task and avoid calling
+setup_new_dl_entity if that's the case.
+
+Fixes: 295d6d5e3736 ("sched/deadline: Fix switching to -deadline")
+Signed-off-by: Wander Lairson Costa <wander@redhat.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Juri Lelli <juri.lelli@redhat.com>
+Link: https://lore.kernel.org/r/20240724142253.27145-2-wander@redhat.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/deadline.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
+index aacd3bf9fa4e7..b9e99bc3b1cf2 100644
+--- a/kernel/sched/deadline.c
++++ b/kernel/sched/deadline.c
+@@ -1674,6 +1674,7 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags)
+       } else if (flags & ENQUEUE_REPLENISH) {
+               replenish_dl_entity(dl_se);
+       } else if ((flags & ENQUEUE_RESTORE) &&
++                !is_dl_boosted(dl_se) &&
+                 dl_time_before(dl_se->deadline,
+                                rq_clock(rq_of_dl_rq(dl_rq_of_se(dl_se))))) {
+               setup_new_dl_entity(dl_se);
+-- 
+2.43.0
+
diff --git a/queue-6.6/sched-deadline-move-bandwidth-accounting-into-en-de-.patch b/queue-6.6/sched-deadline-move-bandwidth-accounting-into-en-de-.patch

new file mode 100644 (file)

index 0000000..25e9f21
--- /dev/null
+++ b/queue-6.6/sched-deadline-move-bandwidth-accounting-into-en-de-.patch
@@ -0,0 +1,264 @@
+From 5209b4474f86b986548fcc5a9f691e21ae79c575 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 4 Nov 2023 11:59:20 +0100
+Subject: sched/deadline: Move bandwidth accounting into {en,de}queue_dl_entity
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+[ Upstream commit 2f7a0f58948d8231236e2facecc500f1930fb996 ]
+
+In preparation of introducing !task sched_dl_entity; move the
+bandwidth accounting into {en.de}queue_dl_entity().
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Daniel Bristot de Oliveira <bristot@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Phil Auld <pauld@redhat.com>
+Reviewed-by: Valentin Schneider <vschneid@redhat.com>
+Link: https://lkml.kernel.org/r/a86dccbbe44e021b8771627e1dae01a69b73466d.1699095159.git.bristot@kernel.org
+Stable-dep-of: 0664e2c311b9 ("sched/deadline: Fix warning in migrate_enable for boosted tasks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/deadline.c | 130 ++++++++++++++++++++++------------------
+ kernel/sched/sched.h    |   6 ++
+ 2 files changed, 78 insertions(+), 58 deletions(-)
+
+diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
+index 97b548c343ddd..aacd3bf9fa4e7 100644
+--- a/kernel/sched/deadline.c
++++ b/kernel/sched/deadline.c
+@@ -391,12 +391,12 @@ static void __dl_clear_params(struct sched_dl_entity *dl_se);
+  * up, and checks if the task is still in the "ACTIVE non contending"
+  * state or not (in the second case, it updates running_bw).
+  */
+-static void task_non_contending(struct task_struct *p)
++static void task_non_contending(struct sched_dl_entity *dl_se)
+ {
+-      struct sched_dl_entity *dl_se = &p->dl;
+       struct hrtimer *timer = &dl_se->inactive_timer;
+       struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
+       struct rq *rq = rq_of_dl_rq(dl_rq);
++      struct task_struct *p = dl_task_of(dl_se);
+       s64 zerolag_time;
+ 
+       /*
+@@ -428,13 +428,14 @@ static void task_non_contending(struct task_struct *p)
+       if ((zerolag_time < 0) || hrtimer_active(&dl_se->inactive_timer)) {
+               if (dl_task(p))
+                       sub_running_bw(dl_se, dl_rq);
++
+               if (!dl_task(p) || READ_ONCE(p->__state) == TASK_DEAD) {
+                       struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
+ 
+                       if (READ_ONCE(p->__state) == TASK_DEAD)
+-                              sub_rq_bw(&p->dl, &rq->dl);
++                              sub_rq_bw(dl_se, &rq->dl);
+                       raw_spin_lock(&dl_b->lock);
+-                      __dl_sub(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
++                      __dl_sub(dl_b, dl_se->dl_bw, dl_bw_cpus(task_cpu(p)));
+                       raw_spin_unlock(&dl_b->lock);
+                       __dl_clear_params(dl_se);
+               }
+@@ -1627,6 +1628,41 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags)
+ 
+       update_stats_enqueue_dl(dl_rq_of_se(dl_se), dl_se, flags);
+ 
++      /*
++       * Check if a constrained deadline task was activated
++       * after the deadline but before the next period.
++       * If that is the case, the task will be throttled and
++       * the replenishment timer will be set to the next period.
++       */
++      if (!dl_se->dl_throttled && !dl_is_implicit(dl_se))
++              dl_check_constrained_dl(dl_se);
++
++      if (flags & (ENQUEUE_RESTORE|ENQUEUE_MIGRATING)) {
++              struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
++
++              add_rq_bw(dl_se, dl_rq);
++              add_running_bw(dl_se, dl_rq);
++      }
++
++      /*
++       * If p is throttled, we do not enqueue it. In fact, if it exhausted
++       * its budget it needs a replenishment and, since it now is on
++       * its rq, the bandwidth timer callback (which clearly has not
++       * run yet) will take care of this.
++       * However, the active utilization does not depend on the fact
++       * that the task is on the runqueue or not (but depends on the
++       * task's state - in GRUB parlance, "inactive" vs "active contending").
++       * In other words, even if a task is throttled its utilization must
++       * be counted in the active utilization; hence, we need to call
++       * add_running_bw().
++       */
++      if (dl_se->dl_throttled && !(flags & ENQUEUE_REPLENISH)) {
++              if (flags & ENQUEUE_WAKEUP)
++                      task_contending(dl_se, flags);
++
++              return;
++      }
++
+       /*
+        * If this is a wakeup or a new instance, the scheduling
+        * parameters of the task might need updating. Otherwise,
+@@ -1646,9 +1682,28 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags)
+       __enqueue_dl_entity(dl_se);
+ }
+ 
+-static void dequeue_dl_entity(struct sched_dl_entity *dl_se)
++static void dequeue_dl_entity(struct sched_dl_entity *dl_se, int flags)
+ {
+       __dequeue_dl_entity(dl_se);
++
++      if (flags & (DEQUEUE_SAVE|DEQUEUE_MIGRATING)) {
++              struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
++
++              sub_running_bw(dl_se, dl_rq);
++              sub_rq_bw(dl_se, dl_rq);
++      }
++
++      /*
++       * This check allows to start the inactive timer (or to immediately
++       * decrease the active utilization, if needed) in two cases:
++       * when the task blocks and when it is terminating
++       * (p->state == TASK_DEAD). We can handle the two cases in the same
++       * way, because from GRUB's point of view the same thing is happening
++       * (the task moves from "active contending" to "active non contending"
++       * or "inactive")
++       */
++      if (flags & DEQUEUE_SLEEP)
++              task_non_contending(dl_se);
+ }
+ 
+ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
+@@ -1693,76 +1748,35 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
+               return;
+       }
+ 
+-      /*
+-       * Check if a constrained deadline task was activated
+-       * after the deadline but before the next period.
+-       * If that is the case, the task will be throttled and
+-       * the replenishment timer will be set to the next period.
+-       */
+-      if (!p->dl.dl_throttled && !dl_is_implicit(&p->dl))
+-              dl_check_constrained_dl(&p->dl);
+-
+-      if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & ENQUEUE_RESTORE) {
+-              add_rq_bw(&p->dl, &rq->dl);
+-              add_running_bw(&p->dl, &rq->dl);
+-      }
+-
+-      /*
+-       * If p is throttled, we do not enqueue it. In fact, if it exhausted
+-       * its budget it needs a replenishment and, since it now is on
+-       * its rq, the bandwidth timer callback (which clearly has not
+-       * run yet) will take care of this.
+-       * However, the active utilization does not depend on the fact
+-       * that the task is on the runqueue or not (but depends on the
+-       * task's state - in GRUB parlance, "inactive" vs "active contending").
+-       * In other words, even if a task is throttled its utilization must
+-       * be counted in the active utilization; hence, we need to call
+-       * add_running_bw().
+-       */
+-      if (p->dl.dl_throttled && !(flags & ENQUEUE_REPLENISH)) {
+-              if (flags & ENQUEUE_WAKEUP)
+-                      task_contending(&p->dl, flags);
+-
+-              return;
+-      }
+-
+       check_schedstat_required();
+       update_stats_wait_start_dl(dl_rq_of_se(&p->dl), &p->dl);
+ 
++      if (p->on_rq == TASK_ON_RQ_MIGRATING)
++              flags |= ENQUEUE_MIGRATING;
++
+       enqueue_dl_entity(&p->dl, flags);
+ 
+-      if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
++      if (!task_current(rq, p) && !p->dl.dl_throttled && p->nr_cpus_allowed > 1)
+               enqueue_pushable_dl_task(rq, p);
+ }
+ 
+ static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
+ {
+       update_stats_dequeue_dl(&rq->dl, &p->dl, flags);
+-      dequeue_dl_entity(&p->dl);
+-      dequeue_pushable_dl_task(rq, p);
++      dequeue_dl_entity(&p->dl, flags);
++
++      if (!p->dl.dl_throttled)
++              dequeue_pushable_dl_task(rq, p);
+ }
+ 
+ static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
+ {
+       update_curr_dl(rq);
+-      __dequeue_task_dl(rq, p, flags);
+ 
+-      if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & DEQUEUE_SAVE) {
+-              sub_running_bw(&p->dl, &rq->dl);
+-              sub_rq_bw(&p->dl, &rq->dl);
+-      }
++      if (p->on_rq == TASK_ON_RQ_MIGRATING)
++              flags |= DEQUEUE_MIGRATING;
+ 
+-      /*
+-       * This check allows to start the inactive timer (or to immediately
+-       * decrease the active utilization, if needed) in two cases:
+-       * when the task blocks and when it is terminating
+-       * (p->state == TASK_DEAD). We can handle the two cases in the same
+-       * way, because from GRUB's point of view the same thing is happening
+-       * (the task moves from "active contending" to "active non contending"
+-       * or "inactive")
+-       */
+-      if (flags & DEQUEUE_SLEEP)
+-              task_non_contending(p);
++      __dequeue_task_dl(rq, p, flags);
+ }
+ 
+ /*
+@@ -2580,7 +2594,7 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p)
+        * will reset the task parameters.
+        */
+       if (task_on_rq_queued(p) && p->dl.dl_runtime)
+-              task_non_contending(p);
++              task_non_contending(&p->dl);
+ 
+       /*
+        * In case a task is setscheduled out from SCHED_DEADLINE we need to
+diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
+index 992ac92d021d2..d48c6a292a83d 100644
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -2196,6 +2196,10 @@ extern const u32                sched_prio_to_wmult[40];
+  * MOVE - paired with SAVE/RESTORE, explicitly does not preserve the location
+  *        in the runqueue.
+  *
++ * NOCLOCK - skip the update_rq_clock() (avoids double updates)
++ *
++ * MIGRATION - p->on_rq == TASK_ON_RQ_MIGRATING (used for DEADLINE)
++ *
+  * ENQUEUE_HEAD      - place at front of runqueue (tail if not specified)
+  * ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline)
+  * ENQUEUE_MIGRATED  - the task was migrated during wakeup
+@@ -2206,6 +2210,7 @@ extern const u32         sched_prio_to_wmult[40];
+ #define DEQUEUE_SAVE          0x02 /* Matches ENQUEUE_RESTORE */
+ #define DEQUEUE_MOVE          0x04 /* Matches ENQUEUE_MOVE */
+ #define DEQUEUE_NOCLOCK               0x08 /* Matches ENQUEUE_NOCLOCK */
++#define DEQUEUE_MIGRATING     0x100 /* Matches ENQUEUE_MIGRATING */
+ 
+ #define ENQUEUE_WAKEUP                0x01
+ #define ENQUEUE_RESTORE               0x02
+@@ -2220,6 +2225,7 @@ extern const u32         sched_prio_to_wmult[40];
+ #define ENQUEUE_MIGRATED      0x00
+ #endif
+ #define ENQUEUE_INITIAL               0x80
++#define ENQUEUE_MIGRATING     0x100
+ 
+ #define RETRY_TASK            ((void *)-1UL)
+ 
+-- 
+2.43.0
+
diff --git a/queue-6.6/sched-fair-check-idle_cpu-before-need_resched-to-det.patch b/queue-6.6/sched-fair-check-idle_cpu-before-need_resched-to-det.patch

new file mode 100644 (file)

index 0000000..b058dc9
--- /dev/null
+++ b/queue-6.6/sched-fair-check-idle_cpu-before-need_resched-to-det.patch
@@ -0,0 +1,60 @@
+From c864ba7c37d6522e910bcf3575fcd9b81d86e0e6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 19 Nov 2024 05:44:31 +0000
+Subject: sched/fair: Check idle_cpu() before need_resched() to detect ilb CPU
+ turning busy
+
+From: K Prateek Nayak <kprateek.nayak@amd.com>
+
+[ Upstream commit ff47a0acfcce309cf9e175149c75614491953c8f ]
+
+Commit b2a02fc43a1f ("smp: Optimize send_call_function_single_ipi()")
+optimizes IPIs to idle CPUs in TIF_POLLING_NRFLAG mode by setting the
+TIF_NEED_RESCHED flag in idle task's thread info and relying on
+flush_smp_call_function_queue() in idle exit path to run the
+call-function. A softirq raised by the call-function is handled shortly
+after in do_softirq_post_smp_call_flush() but the TIF_NEED_RESCHED flag
+remains set and is only cleared later when schedule_idle() calls
+__schedule().
+
+need_resched() check in _nohz_idle_balance() exists to bail out of load
+balancing if another task has woken up on the CPU currently in-charge of
+idle load balancing which is being processed in SCHED_SOFTIRQ context.
+Since the optimization mentioned above overloads the interpretation of
+TIF_NEED_RESCHED, check for idle_cpu() before going with the existing
+need_resched() check which can catch a genuine task wakeup on an idle
+CPU processing SCHED_SOFTIRQ from do_softirq_post_smp_call_flush(), as
+well as the case where ksoftirqd needs to be preempted as a result of
+new task wakeup or slice expiry.
+
+In case of PREEMPT_RT or threadirqs, although the idle load balancing
+may be inhibited in some cases on the ilb CPU, the fact that ksoftirqd
+is the only fair task going back to sleep will trigger a newidle balance
+on the CPU which will alleviate some imbalance if it exists if idle
+balance fails to do so.
+
+Fixes: b2a02fc43a1f ("smp: Optimize send_call_function_single_ipi()")
+Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20241119054432.6405-4-kprateek.nayak@amd.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/fair.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index ddab19e5bd637..d1a67776ecb5d 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -12121,7 +12121,7 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags)
+                * work being done for other CPUs. Next load
+                * balancing owner will pick it up.
+                */
+-              if (need_resched()) {
++              if (!idle_cpu(this_cpu) && need_resched()) {
+                       if (flags & NOHZ_STATS_KICK)
+                               has_blocked_load = true;
+                       if (flags & NOHZ_NEXT_KICK)
+-- 
+2.43.0
+
diff --git a/queue-6.6/sched-fair-rename-check_preempt_curr-to-wakeup_preem.patch b/queue-6.6/sched-fair-rename-check_preempt_curr-to-wakeup_preem.patch

new file mode 100644 (file)

index 0000000..d7a5a6c
--- /dev/null
+++ b/queue-6.6/sched-fair-rename-check_preempt_curr-to-wakeup_preem.patch
@@ -0,0 +1,288 @@
+From 878a41de1e8673830ab904e5b87c4a68c3a29c34 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 19 Sep 2023 10:38:21 +0200
+Subject: sched/fair: Rename check_preempt_curr() to wakeup_preempt()
+
+From: Ingo Molnar <mingo@kernel.org>
+
+[ Upstream commit e23edc86b09df655bf8963bbcb16647adc787395 ]
+
+The name is a bit opaque - make it clear that this is about wakeup
+preemption.
+
+Also rename the ->check_preempt_curr() methods similarly.
+
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Stable-dep-of: 0664e2c311b9 ("sched/deadline: Fix warning in migrate_enable for boosted tasks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/core.c      | 14 +++++++-------
+ kernel/sched/deadline.c  | 10 +++++-----
+ kernel/sched/fair.c      | 10 +++++-----
+ kernel/sched/idle.c      |  4 ++--
+ kernel/sched/rt.c        |  6 +++---
+ kernel/sched/sched.h     |  4 ++--
+ kernel/sched/stop_task.c |  4 ++--
+ 7 files changed, 26 insertions(+), 26 deletions(-)
+
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index da14c7450156b..7181e6aae16b4 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -2214,10 +2214,10 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p,
+               p->sched_class->prio_changed(rq, p, oldprio);
+ }
+ 
+-void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
++void wakeup_preempt(struct rq *rq, struct task_struct *p, int flags)
+ {
+       if (p->sched_class == rq->curr->sched_class)
+-              rq->curr->sched_class->check_preempt_curr(rq, p, flags);
++              rq->curr->sched_class->wakeup_preempt(rq, p, flags);
+       else if (sched_class_above(p->sched_class, rq->curr->sched_class))
+               resched_curr(rq);
+ 
+@@ -2523,7 +2523,7 @@ static struct rq *move_queued_task(struct rq *rq, struct rq_flags *rf,
+       rq_lock(rq, rf);
+       WARN_ON_ONCE(task_cpu(p) != new_cpu);
+       activate_task(rq, p, 0);
+-      check_preempt_curr(rq, p, 0);
++      wakeup_preempt(rq, p, 0);
+ 
+       return rq;
+ }
+@@ -3409,7 +3409,7 @@ static void __migrate_swap_task(struct task_struct *p, int cpu)
+               deactivate_task(src_rq, p, 0);
+               set_task_cpu(p, cpu);
+               activate_task(dst_rq, p, 0);
+-              check_preempt_curr(dst_rq, p, 0);
++              wakeup_preempt(dst_rq, p, 0);
+ 
+               rq_unpin_lock(dst_rq, &drf);
+               rq_unpin_lock(src_rq, &srf);
+@@ -3785,7 +3785,7 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
+       }
+ 
+       activate_task(rq, p, en_flags);
+-      check_preempt_curr(rq, p, wake_flags);
++      wakeup_preempt(rq, p, wake_flags);
+ 
+       ttwu_do_wakeup(p);
+ 
+@@ -3856,7 +3856,7 @@ static int ttwu_runnable(struct task_struct *p, int wake_flags)
+                        * it should preempt the task that is current now.
+                        */
+                       update_rq_clock(rq);
+-                      check_preempt_curr(rq, p, wake_flags);
++                      wakeup_preempt(rq, p, wake_flags);
+               }
+               ttwu_do_wakeup(p);
+               ret = 1;
+@@ -4871,7 +4871,7 @@ void wake_up_new_task(struct task_struct *p)
+ 
+       activate_task(rq, p, ENQUEUE_NOCLOCK);
+       trace_sched_wakeup_new(p);
+-      check_preempt_curr(rq, p, WF_FORK);
++      wakeup_preempt(rq, p, WF_FORK);
+ #ifdef CONFIG_SMP
+       if (p->sched_class->task_woken) {
+               /*
+diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
+index d78f2e8769fb4..36aeaaf9ab090 100644
+--- a/kernel/sched/deadline.c
++++ b/kernel/sched/deadline.c
+@@ -763,7 +763,7 @@ static inline void deadline_queue_pull_task(struct rq *rq)
+ 
+ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags);
+ static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags);
+-static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p, int flags);
++static void wakeup_preempt_dl(struct rq *rq, struct task_struct *p, int flags);
+ 
+ static inline void replenish_dl_new_period(struct sched_dl_entity *dl_se,
+                                           struct rq *rq)
+@@ -1175,7 +1175,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
+ 
+       enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
+       if (dl_task(rq->curr))
+-              check_preempt_curr_dl(rq, p, 0);
++              wakeup_preempt_dl(rq, p, 0);
+       else
+               resched_curr(rq);
+ 
+@@ -1939,7 +1939,7 @@ static int balance_dl(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
+  * Only called when both the current and waking task are -deadline
+  * tasks.
+  */
+-static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p,
++static void wakeup_preempt_dl(struct rq *rq, struct task_struct *p,
+                                 int flags)
+ {
+       if (dl_entity_preempt(&p->dl, &rq->curr->dl)) {
+@@ -2654,7 +2654,7 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
+                       deadline_queue_push_tasks(rq);
+ #endif
+               if (dl_task(rq->curr))
+-                      check_preempt_curr_dl(rq, p, 0);
++                      wakeup_preempt_dl(rq, p, 0);
+               else
+                       resched_curr(rq);
+       } else {
+@@ -2723,7 +2723,7 @@ DEFINE_SCHED_CLASS(dl) = {
+       .dequeue_task           = dequeue_task_dl,
+       .yield_task             = yield_task_dl,
+ 
+-      .check_preempt_curr     = check_preempt_curr_dl,
++      .wakeup_preempt         = wakeup_preempt_dl,
+ 
+       .pick_next_task         = pick_next_task_dl,
+       .put_prev_task          = put_prev_task_dl,
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index 685774895bcec..a32d344623716 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -8268,7 +8268,7 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int
+ 
+       /*
+        * This is possible from callers such as attach_tasks(), in which we
+-       * unconditionally check_preempt_curr() after an enqueue (which may have
++       * unconditionally wakeup_preempt() after an enqueue (which may have
+        * lead to a throttle).  This both saves work and prevents false
+        * next-buddy nomination below.
+        */
+@@ -9167,7 +9167,7 @@ static void attach_task(struct rq *rq, struct task_struct *p)
+ 
+       WARN_ON_ONCE(task_rq(p) != rq);
+       activate_task(rq, p, ENQUEUE_NOCLOCK);
+-      check_preempt_curr(rq, p, 0);
++      wakeup_preempt(rq, p, 0);
+ }
+ 
+ /*
+@@ -12641,7 +12641,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio)
+               if (p->prio > oldprio)
+                       resched_curr(rq);
+       } else
+-              check_preempt_curr(rq, p, 0);
++              wakeup_preempt(rq, p, 0);
+ }
+ 
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+@@ -12743,7 +12743,7 @@ static void switched_to_fair(struct rq *rq, struct task_struct *p)
+               if (task_current(rq, p))
+                       resched_curr(rq);
+               else
+-                      check_preempt_curr(rq, p, 0);
++                      wakeup_preempt(rq, p, 0);
+       }
+ }
+ 
+@@ -13102,7 +13102,7 @@ DEFINE_SCHED_CLASS(fair) = {
+       .yield_task             = yield_task_fair,
+       .yield_to_task          = yield_to_task_fair,
+ 
+-      .check_preempt_curr     = check_preempt_wakeup_fair,
++      .wakeup_preempt         = check_preempt_wakeup_fair,
+ 
+       .pick_next_task         = __pick_next_task_fair,
+       .put_prev_task          = put_prev_task_fair,
+diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
+index 5007b25c5bc65..565f8374ddbbf 100644
+--- a/kernel/sched/idle.c
++++ b/kernel/sched/idle.c
+@@ -401,7 +401,7 @@ balance_idle(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
+ /*
+  * Idle tasks are unconditionally rescheduled:
+  */
+-static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int flags)
++static void wakeup_preempt_idle(struct rq *rq, struct task_struct *p, int flags)
+ {
+       resched_curr(rq);
+ }
+@@ -482,7 +482,7 @@ DEFINE_SCHED_CLASS(idle) = {
+       /* dequeue is not valid, we print a debug message there: */
+       .dequeue_task           = dequeue_task_idle,
+ 
+-      .check_preempt_curr     = check_preempt_curr_idle,
++      .wakeup_preempt         = wakeup_preempt_idle,
+ 
+       .pick_next_task         = pick_next_task_idle,
+       .put_prev_task          = put_prev_task_idle,
+diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
+index 4ac36eb4cdee5..a8c47d8d51bde 100644
+--- a/kernel/sched/rt.c
++++ b/kernel/sched/rt.c
+@@ -957,7 +957,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
+ 
+                               /*
+                                * When we're idle and a woken (rt) task is
+-                               * throttled check_preempt_curr() will set
++                               * throttled wakeup_preempt() will set
+                                * skip_update and the time between the wakeup
+                                * and this unthrottle will get accounted as
+                                * 'runtime'.
+@@ -1719,7 +1719,7 @@ static int balance_rt(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
+ /*
+  * Preempt the current task with a newly woken task if needed:
+  */
+-static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags)
++static void wakeup_preempt_rt(struct rq *rq, struct task_struct *p, int flags)
+ {
+       if (p->prio < rq->curr->prio) {
+               resched_curr(rq);
+@@ -2710,7 +2710,7 @@ DEFINE_SCHED_CLASS(rt) = {
+       .dequeue_task           = dequeue_task_rt,
+       .yield_task             = yield_task_rt,
+ 
+-      .check_preempt_curr     = check_preempt_curr_rt,
++      .wakeup_preempt         = wakeup_preempt_rt,
+ 
+       .pick_next_task         = pick_next_task_rt,
+       .put_prev_task          = put_prev_task_rt,
+diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
+index 8cbbbea7fdbbd..0e289300fe78d 100644
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -2242,7 +2242,7 @@ struct sched_class {
+       void (*yield_task)   (struct rq *rq);
+       bool (*yield_to_task)(struct rq *rq, struct task_struct *p);
+ 
+-      void (*check_preempt_curr)(struct rq *rq, struct task_struct *p, int flags);
++      void (*wakeup_preempt)(struct rq *rq, struct task_struct *p, int flags);
+ 
+       struct task_struct *(*pick_next_task)(struct rq *rq);
+ 
+@@ -2516,7 +2516,7 @@ static inline void sub_nr_running(struct rq *rq, unsigned count)
+ extern void activate_task(struct rq *rq, struct task_struct *p, int flags);
+ extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags);
+ 
+-extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
++extern void wakeup_preempt(struct rq *rq, struct task_struct *p, int flags);
+ 
+ #ifdef CONFIG_PREEMPT_RT
+ #define SCHED_NR_MIGRATE_BREAK 8
+diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
+index 85590599b4d60..6cf7304e6449d 100644
+--- a/kernel/sched/stop_task.c
++++ b/kernel/sched/stop_task.c
+@@ -23,7 +23,7 @@ balance_stop(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
+ #endif /* CONFIG_SMP */
+ 
+ static void
+-check_preempt_curr_stop(struct rq *rq, struct task_struct *p, int flags)
++wakeup_preempt_stop(struct rq *rq, struct task_struct *p, int flags)
+ {
+       /* we're never preempted */
+ }
+@@ -120,7 +120,7 @@ DEFINE_SCHED_CLASS(stop) = {
+       .dequeue_task           = dequeue_task_stop,
+       .yield_task             = yield_task_stop,
+ 
+-      .check_preempt_curr     = check_preempt_curr_stop,
++      .wakeup_preempt         = wakeup_preempt_stop,
+ 
+       .pick_next_task         = pick_next_task_stop,
+       .put_prev_task          = put_prev_task_stop,
+-- 
+2.43.0
+
diff --git a/queue-6.6/sched-fair-rename-check_preempt_wakeup-to-check_pree.patch b/queue-6.6/sched-fair-rename-check_preempt_wakeup-to-check_pree.patch

new file mode 100644 (file)

index 0000000..c17d528
--- /dev/null
+++ b/queue-6.6/sched-fair-rename-check_preempt_wakeup-to-check_pree.patch
@@ -0,0 +1,46 @@
+From d296877d5a57fb95178dd0e1caf9497663c1f630 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 19 Sep 2023 10:31:15 +0200
+Subject: sched/fair: Rename check_preempt_wakeup() to
+ check_preempt_wakeup_fair()
+
+From: Ingo Molnar <mingo@kernel.org>
+
+[ Upstream commit 82845683ca6a15fe8c7912c6264bb0e84ec6f5fb ]
+
+Other scheduling classes already postfix their similar methods
+with the class name.
+
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Stable-dep-of: 0664e2c311b9 ("sched/deadline: Fix warning in migrate_enable for boosted tasks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/fair.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index d1a67776ecb5d..685774895bcec 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -8255,7 +8255,7 @@ static void set_next_buddy(struct sched_entity *se)
+ /*
+  * Preempt the current task with a newly woken task if needed:
+  */
+-static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
++static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int wake_flags)
+ {
+       struct task_struct *curr = rq->curr;
+       struct sched_entity *se = &curr->se, *pse = &p->se;
+@@ -13102,7 +13102,7 @@ DEFINE_SCHED_CLASS(fair) = {
+       .yield_task             = yield_task_fair,
+       .yield_to_task          = yield_to_task_fair,
+ 
+-      .check_preempt_curr     = check_preempt_wakeup,
++      .check_preempt_curr     = check_preempt_wakeup_fair,
+ 
+       .pick_next_task         = __pick_next_task_fair,
+       .put_prev_task          = put_prev_task_fair,
+-- 
+2.43.0
+
diff --git a/queue-6.6/sched-headers-move-struct-sched_param-out-of-uapi-to.patch b/queue-6.6/sched-headers-move-struct-sched_param-out-of-uapi-to.patch

new file mode 100644 (file)

index 0000000..bb4e42d
--- /dev/null
+++ b/queue-6.6/sched-headers-move-struct-sched_param-out-of-uapi-to.patch
@@ -0,0 +1,117 @@
+From a2306ba0f6fc0cff62d3db4ab0a5fcb941d92de8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 7 Aug 2023 20:03:57 -0700
+Subject: sched/headers: Move 'struct sched_param' out of uapi, to work around
+ glibc/musl breakage
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Kir Kolyshkin <kolyshkin@gmail.com>
+
+[ Upstream commit d844fe65f0957024c3e1b0bf2a0615246184d9bc ]
+
+Both glibc and musl define 'struct sched_param' in sched.h, while kernel
+has it in uapi/linux/sched/types.h, making it cumbersome to use
+sched_getattr(2) or sched_setattr(2) from userspace.
+
+For example, something like this:
+
+       #include <sched.h>
+       #include <linux/sched/types.h>
+
+       struct sched_attr sa;
+
+will result in "error: redefinition of ‘struct sched_param’" (note the
+code doesn't need sched_param at all -- it needs struct sched_attr
+plus some stuff from sched.h).
+
+The situation is, glibc is not going to provide a wrapper for
+sched_{get,set}attr, thus the need to include linux/sched_types.h
+directly, which leads to the above problem.
+
+Thus, the userspace is left with a few sub-par choices when it wants to
+use e.g. sched_setattr(2), such as maintaining a copy of struct
+sched_attr definition, or using some other ugly tricks.
+
+OTOH, 'struct sched_param' is well known, defined in POSIX, and it won't
+be ever changed (as that would break backward compatibility).
+
+So, while 'struct sched_param' is indeed part of the kernel uapi,
+exposing it the way it's done now creates an issue, and hiding it
+(like this patch does) fixes that issue, hopefully without creating
+another one: common userspace software rely on libc headers, and as
+for "special" software (like libc), it looks like glibc and musl
+do not rely on kernel headers for 'struct sched_param' definition
+(but let's Cc their mailing lists in case it's otherwise).
+
+The alternative to this patch would be to move struct sched_attr to,
+say, linux/sched.h, or linux/sched/attr.h (the new file).
+
+Oh, and here is the previous attempt to fix the issue:
+
+  https://lore.kernel.org/all/20200528135552.GA87103@google.com/
+
+While I support Linus arguments, the issue is still here
+and needs to be fixed.
+
+[ mingo: Linus is right, this shouldn't be needed - but on the other
+         hand I agree that this header is not really helpful to
+        user-space as-is. So let's pretend that
+        <uapi/linux/sched/types.h> is only about sched_attr, and
+        call this commit a workaround for user-space breakage
+        that it in reality is ... Also, remove the Fixes tag. ]
+
+Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20230808030357.1213829-1-kolyshkin@gmail.com
+Stable-dep-of: 0664e2c311b9 ("sched/deadline: Fix warning in migrate_enable for boosted tasks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/sched.h            | 5 ++++-
+ include/uapi/linux/sched/types.h | 4 ----
+ 2 files changed, 4 insertions(+), 5 deletions(-)
+
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index 3d83cc397eac1..323aa1aaaf91e 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -63,7 +63,6 @@ struct robust_list_head;
+ struct root_domain;
+ struct rq;
+ struct sched_attr;
+-struct sched_param;
+ struct seq_file;
+ struct sighand_struct;
+ struct signal_struct;
+@@ -370,6 +369,10 @@ extern struct root_domain def_root_domain;
+ extern struct mutex sched_domains_mutex;
+ #endif
+ 
++struct sched_param {
++      int sched_priority;
++};
++
+ struct sched_info {
+ #ifdef CONFIG_SCHED_INFO
+       /* Cumulative counters: */
+diff --git a/include/uapi/linux/sched/types.h b/include/uapi/linux/sched/types.h
+index f2c4589d4dbfe..90662385689bb 100644
+--- a/include/uapi/linux/sched/types.h
++++ b/include/uapi/linux/sched/types.h
+@@ -4,10 +4,6 @@
+ 
+ #include <linux/types.h>
+ 
+-struct sched_param {
+-      int sched_priority;
+-};
+-
+ #define SCHED_ATTR_SIZE_VER0  48      /* sizeof first published struct */
+ #define SCHED_ATTR_SIZE_VER1  56      /* add: util_{min,max} */
+ 
+-- 
+2.43.0
+
diff --git a/queue-6.6/sched-remove-vruntime-from-trace_sched_stat_runtime.patch b/queue-6.6/sched-remove-vruntime-from-trace_sched_stat_runtime.patch

new file mode 100644 (file)

index 0000000..2017944
--- /dev/null
+++ b/queue-6.6/sched-remove-vruntime-from-trace_sched_stat_runtime.patch
@@ -0,0 +1,95 @@
+From 13751b8fb97f75c813b7992dba41d320a0f2489a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 6 Nov 2023 13:41:43 +0100
+Subject: sched: Remove vruntime from trace_sched_stat_runtime()
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+[ Upstream commit 5fe6ec8f6ab549b6422e41551abb51802bd48bc7 ]
+
+Tracing the runtime delta makes sense, observer can sum over time.
+Tracing the absolute vruntime makes less sense, inconsistent:
+absolute-vs-delta, but also vruntime delta can be computed from
+runtime delta.
+
+Removing the vruntime thing also makes the two tracepoint sites
+identical, allowing to unify the code in a later patch.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Stable-dep-of: 0664e2c311b9 ("sched/deadline: Fix warning in migrate_enable for boosted tasks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/trace/events/sched.h | 15 ++++++---------
+ kernel/sched/fair.c          |  5 ++---
+ 2 files changed, 8 insertions(+), 12 deletions(-)
+
+diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
+index 010ba1b7cb0ea..bdb1e838954af 100644
+--- a/include/trace/events/sched.h
++++ b/include/trace/events/sched.h
+@@ -493,33 +493,30 @@ DEFINE_EVENT_SCHEDSTAT(sched_stat_template, sched_stat_blocked,
+  */
+ DECLARE_EVENT_CLASS(sched_stat_runtime,
+ 
+-      TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
++      TP_PROTO(struct task_struct *tsk, u64 runtime),
+ 
+-      TP_ARGS(tsk, __perf_count(runtime), vruntime),
++      TP_ARGS(tsk, __perf_count(runtime)),
+ 
+       TP_STRUCT__entry(
+               __array( char,  comm,   TASK_COMM_LEN   )
+               __field( pid_t, pid                     )
+               __field( u64,   runtime                 )
+-              __field( u64,   vruntime                        )
+       ),
+ 
+       TP_fast_assign(
+               memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+               __entry->pid            = tsk->pid;
+               __entry->runtime        = runtime;
+-              __entry->vruntime       = vruntime;
+       ),
+ 
+-      TP_printk("comm=%s pid=%d runtime=%Lu [ns] vruntime=%Lu [ns]",
++      TP_printk("comm=%s pid=%d runtime=%Lu [ns]",
+                       __entry->comm, __entry->pid,
+-                      (unsigned long long)__entry->runtime,
+-                      (unsigned long long)__entry->vruntime)
++                      (unsigned long long)__entry->runtime)
+ );
+ 
+ DEFINE_EVENT(sched_stat_runtime, sched_stat_runtime,
+-           TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
+-           TP_ARGS(tsk, runtime, vruntime));
++           TP_PROTO(struct task_struct *tsk, u64 runtime),
++           TP_ARGS(tsk, runtime));
+ 
+ /*
+  * Tracepoint for showing priority inheritance modifying a tasks
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index 3e9333466438c..062447861d8e6 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -1185,8 +1185,7 @@ s64 update_curr_common(struct rq *rq)
+       if (unlikely(delta_exec <= 0))
+               return delta_exec;
+ 
+-      trace_sched_stat_runtime(curr, delta_exec, 0);
+-
++      trace_sched_stat_runtime(curr, delta_exec);
+       account_group_exec_runtime(curr, delta_exec);
+       cgroup_account_cputime(curr, delta_exec);
+ 
+@@ -1215,7 +1214,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
+       if (entity_is_task(curr)) {
+               struct task_struct *curtask = task_of(curr);
+ 
+-              trace_sched_stat_runtime(curtask, delta_exec, curr->vruntime);
++              trace_sched_stat_runtime(curtask, delta_exec);
+               cgroup_account_cputime(curtask, delta_exec);
+               account_group_exec_runtime(curtask, delta_exec);
+       }
+-- 
+2.43.0
+
diff --git a/queue-6.6/sched-unify-more-update_curr.patch b/queue-6.6/sched-unify-more-update_curr.patch

new file mode 100644 (file)

index 0000000..d519912
--- /dev/null
+++ b/queue-6.6/sched-unify-more-update_curr.patch
@@ -0,0 +1,72 @@
+From 09e2cbee382d5b6ad440ccacd38906156dcd8720 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 6 Nov 2023 14:04:01 +0100
+Subject: sched: Unify more update_curr*()
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+[ Upstream commit c708a4dc5ab547edc3d6537233ca9e79ea30ce47 ]
+
+Now that trace_sched_stat_runtime() no longer takes a vruntime
+argument, the task specific bits are identical between
+update_curr_common() and update_curr().
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Stable-dep-of: 0664e2c311b9 ("sched/deadline: Fix warning in migrate_enable for boosted tasks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/fair.c | 24 +++++++++++-------------
+ 1 file changed, 11 insertions(+), 13 deletions(-)
+
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index 062447861d8e6..3b2cfdb8d788d 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -1173,6 +1173,13 @@ static s64 update_curr_se(struct rq *rq, struct sched_entity *curr)
+       return delta_exec;
+ }
+ 
++static inline void update_curr_task(struct task_struct *p, s64 delta_exec)
++{
++      trace_sched_stat_runtime(p, delta_exec);
++      account_group_exec_runtime(p, delta_exec);
++      cgroup_account_cputime(p, delta_exec);
++}
++
+ /*
+  * Used by other classes to account runtime.
+  */
+@@ -1182,12 +1189,8 @@ s64 update_curr_common(struct rq *rq)
+       s64 delta_exec;
+ 
+       delta_exec = update_curr_se(rq, &curr->se);
+-      if (unlikely(delta_exec <= 0))
+-              return delta_exec;
+-
+-      trace_sched_stat_runtime(curr, delta_exec);
+-      account_group_exec_runtime(curr, delta_exec);
+-      cgroup_account_cputime(curr, delta_exec);
++      if (likely(delta_exec > 0))
++              update_curr_task(curr, delta_exec);
+ 
+       return delta_exec;
+ }
+@@ -1211,13 +1214,8 @@ static void update_curr(struct cfs_rq *cfs_rq)
+       update_deadline(cfs_rq, curr);
+       update_min_vruntime(cfs_rq);
+ 
+-      if (entity_is_task(curr)) {
+-              struct task_struct *curtask = task_of(curr);
+-
+-              trace_sched_stat_runtime(curtask, delta_exec);
+-              cgroup_account_cputime(curtask, delta_exec);
+-              account_group_exec_runtime(curtask, delta_exec);
+-      }
++      if (entity_is_task(curr))
++              update_curr_task(task_of(curr), delta_exec);
+ 
+       account_cfs_rq_runtime(cfs_rq, delta_exec);
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.6/sched-unify-runtime-accounting-across-classes.patch b/queue-6.6/sched-unify-runtime-accounting-across-classes.patch

new file mode 100644 (file)

index 0000000..872dea7
--- /dev/null
+++ b/queue-6.6/sched-unify-runtime-accounting-across-classes.patch
@@ -0,0 +1,253 @@
+From 5a4948ae96cd5dfff1686ea0c1b446f44191e2af Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 4 Nov 2023 11:59:18 +0100
+Subject: sched: Unify runtime accounting across classes
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+[ Upstream commit 5d69eca542ee17c618f9a55da52191d5e28b435f ]
+
+All classes use sched_entity::exec_start to track runtime and have
+copies of the exact same code around to compute runtime.
+
+Collapse all that.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Daniel Bristot de Oliveira <bristot@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Phil Auld <pauld@redhat.com>
+Reviewed-by: Valentin Schneider <vschneid@redhat.com>
+Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Link: https://lkml.kernel.org/r/54d148a144f26d9559698c4dd82d8859038a7380.1699095159.git.bristot@kernel.org
+Stable-dep-of: 0664e2c311b9 ("sched/deadline: Fix warning in migrate_enable for boosted tasks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/sched.h    |  2 +-
+ kernel/sched/deadline.c  | 15 +++--------
+ kernel/sched/fair.c      | 57 ++++++++++++++++++++++++++++++----------
+ kernel/sched/rt.c        | 15 +++--------
+ kernel/sched/sched.h     | 12 ++-------
+ kernel/sched/stop_task.c | 13 +--------
+ 6 files changed, 53 insertions(+), 61 deletions(-)
+
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index 323aa1aaaf91e..4809f27b52017 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -523,7 +523,7 @@ struct sched_statistics {
+       u64                             block_max;
+       s64                             sum_block_runtime;
+ 
+-      u64                             exec_max;
++      s64                             exec_max;
+       u64                             slice_max;
+ 
+       u64                             nr_migrations_cold;
+diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
+index 36aeaaf9ab090..6421d28553576 100644
+--- a/kernel/sched/deadline.c
++++ b/kernel/sched/deadline.c
+@@ -1299,9 +1299,8 @@ static void update_curr_dl(struct rq *rq)
+ {
+       struct task_struct *curr = rq->curr;
+       struct sched_dl_entity *dl_se = &curr->dl;
+-      u64 delta_exec, scaled_delta_exec;
++      s64 delta_exec, scaled_delta_exec;
+       int cpu = cpu_of(rq);
+-      u64 now;
+ 
+       if (!dl_task(curr) || !on_dl_rq(dl_se))
+               return;
+@@ -1314,21 +1313,13 @@ static void update_curr_dl(struct rq *rq)
+        * natural solution, but the full ramifications of this
+        * approach need further study.
+        */
+-      now = rq_clock_task(rq);
+-      delta_exec = now - curr->se.exec_start;
+-      if (unlikely((s64)delta_exec <= 0)) {
++      delta_exec = update_curr_common(rq);
++      if (unlikely(delta_exec <= 0)) {
+               if (unlikely(dl_se->dl_yielded))
+                       goto throttle;
+               return;
+       }
+ 
+-      schedstat_set(curr->stats.exec_max,
+-                    max(curr->stats.exec_max, delta_exec));
+-
+-      trace_sched_stat_runtime(curr, delta_exec, 0);
+-
+-      update_current_exec_runtime(curr, now, delta_exec);
+-
+       if (dl_entity_is_special(dl_se))
+               return;
+ 
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index a32d344623716..3e9333466438c 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -1150,23 +1150,17 @@ static void update_tg_load_avg(struct cfs_rq *cfs_rq)
+ }
+ #endif /* CONFIG_SMP */
+ 
+-/*
+- * Update the current task's runtime statistics.
+- */
+-static void update_curr(struct cfs_rq *cfs_rq)
++static s64 update_curr_se(struct rq *rq, struct sched_entity *curr)
+ {
+-      struct sched_entity *curr = cfs_rq->curr;
+-      u64 now = rq_clock_task(rq_of(cfs_rq));
+-      u64 delta_exec;
+-
+-      if (unlikely(!curr))
+-              return;
++      u64 now = rq_clock_task(rq);
++      s64 delta_exec;
+ 
+       delta_exec = now - curr->exec_start;
+-      if (unlikely((s64)delta_exec <= 0))
+-              return;
++      if (unlikely(delta_exec <= 0))
++              return delta_exec;
+ 
+       curr->exec_start = now;
++      curr->sum_exec_runtime += delta_exec;
+ 
+       if (schedstat_enabled()) {
+               struct sched_statistics *stats;
+@@ -1176,8 +1170,43 @@ static void update_curr(struct cfs_rq *cfs_rq)
+                               max(delta_exec, stats->exec_max));
+       }
+ 
+-      curr->sum_exec_runtime += delta_exec;
+-      schedstat_add(cfs_rq->exec_clock, delta_exec);
++      return delta_exec;
++}
++
++/*
++ * Used by other classes to account runtime.
++ */
++s64 update_curr_common(struct rq *rq)
++{
++      struct task_struct *curr = rq->curr;
++      s64 delta_exec;
++
++      delta_exec = update_curr_se(rq, &curr->se);
++      if (unlikely(delta_exec <= 0))
++              return delta_exec;
++
++      trace_sched_stat_runtime(curr, delta_exec, 0);
++
++      account_group_exec_runtime(curr, delta_exec);
++      cgroup_account_cputime(curr, delta_exec);
++
++      return delta_exec;
++}
++
++/*
++ * Update the current task's runtime statistics.
++ */
++static void update_curr(struct cfs_rq *cfs_rq)
++{
++      struct sched_entity *curr = cfs_rq->curr;
++      s64 delta_exec;
++
++      if (unlikely(!curr))
++              return;
++
++      delta_exec = update_curr_se(rq_of(cfs_rq), curr);
++      if (unlikely(delta_exec <= 0))
++              return;
+ 
+       curr->vruntime += calc_delta_fair(delta_exec, curr);
+       update_deadline(cfs_rq, curr);
+diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
+index a8c47d8d51bde..b89223a973168 100644
+--- a/kernel/sched/rt.c
++++ b/kernel/sched/rt.c
+@@ -1050,24 +1050,15 @@ static void update_curr_rt(struct rq *rq)
+ {
+       struct task_struct *curr = rq->curr;
+       struct sched_rt_entity *rt_se = &curr->rt;
+-      u64 delta_exec;
+-      u64 now;
++      s64 delta_exec;
+ 
+       if (curr->sched_class != &rt_sched_class)
+               return;
+ 
+-      now = rq_clock_task(rq);
+-      delta_exec = now - curr->se.exec_start;
+-      if (unlikely((s64)delta_exec <= 0))
++      delta_exec = update_curr_common(rq);
++      if (unlikely(delta_exec <= 0))
+               return;
+ 
+-      schedstat_set(curr->stats.exec_max,
+-                    max(curr->stats.exec_max, delta_exec));
+-
+-      trace_sched_stat_runtime(curr, delta_exec, 0);
+-
+-      update_current_exec_runtime(curr, now, delta_exec);
+-
+       if (!rt_bandwidth_enabled())
+               return;
+ 
+diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
+index 0e289300fe78d..1d586e7576bc2 100644
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -2231,6 +2231,8 @@ struct affinity_context {
+       unsigned int flags;
+ };
+ 
++extern s64 update_curr_common(struct rq *rq);
++
+ struct sched_class {
+ 
+ #ifdef CONFIG_UCLAMP_TASK
+@@ -3283,16 +3285,6 @@ extern int sched_dynamic_mode(const char *str);
+ extern void sched_dynamic_update(int mode);
+ #endif
+ 
+-static inline void update_current_exec_runtime(struct task_struct *curr,
+-                                              u64 now, u64 delta_exec)
+-{
+-      curr->se.sum_exec_runtime += delta_exec;
+-      account_group_exec_runtime(curr, delta_exec);
+-
+-      curr->se.exec_start = now;
+-      cgroup_account_cputime(curr, delta_exec);
+-}
+-
+ #ifdef CONFIG_SCHED_MM_CID
+ 
+ #define SCHED_MM_CID_PERIOD_NS        (100ULL * 1000000)      /* 100ms */
+diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
+index 6cf7304e6449d..b1b8fe61c532a 100644
+--- a/kernel/sched/stop_task.c
++++ b/kernel/sched/stop_task.c
+@@ -70,18 +70,7 @@ static void yield_task_stop(struct rq *rq)
+ 
+ static void put_prev_task_stop(struct rq *rq, struct task_struct *prev)
+ {
+-      struct task_struct *curr = rq->curr;
+-      u64 now, delta_exec;
+-
+-      now = rq_clock_task(rq);
+-      delta_exec = now - curr->se.exec_start;
+-      if (unlikely((s64)delta_exec < 0))
+-              delta_exec = 0;
+-
+-      schedstat_set(curr->stats.exec_max,
+-                    max(curr->stats.exec_max, delta_exec));
+-
+-      update_current_exec_runtime(curr, now, delta_exec);
++      update_curr_common(rq);
+ }
+ 
+ /*
+-- 
+2.43.0
+
diff --git a/queue-6.6/series b/queue-6.6/series

index 87c3111b75919bf838f09223c7f3ad6e5103b3c6..d0c61f24c78f6602711b3703bd22ada5d05b8177 100644 (file)
--- a/queue-6.6/series
+++ b/queue-6.6/series
@@ -328,3 +328,19 @@ sched-numa-fix-mm-numa_scan_seq-based-unconditional-.patch
  sched-numa-fix-memory-leak-due-to-the-overwritten-vm.patch
  mempolicy-fix-migrate_pages-2-syscall-return-nr_fail.patch
  mm-mempolicy-fix-migrate_to_node-assuming-there-is-a.patch
+sched-core-remove-the-unnecessary-need_resched-check.patch
+sched-fair-check-idle_cpu-before-need_resched-to-det.patch
+sched-core-prevent-wakeup-of-ksoftirqd-during-idle-l.patch
+sched-fair-rename-check_preempt_wakeup-to-check_pree.patch
+sched-fair-rename-check_preempt_curr-to-wakeup_preem.patch
+sched-headers-move-struct-sched_param-out-of-uapi-to.patch
+sched-unify-runtime-accounting-across-classes.patch
+sched-remove-vruntime-from-trace_sched_stat_runtime.patch
+sched-unify-more-update_curr.patch
+sched-deadline-collect-sched_dl_entity-initializatio.patch
+sched-deadline-move-bandwidth-accounting-into-en-de-.patch
+sched-deadline-fix-warning-in-migrate_enable-for-boo.patch
+btrfs-fix-missing-snapshot-drew-unlock-when-root-is-.patch
+clk-en7523-initialize-num-before-accessing-hws-in-en.patch
+tracing-eprobe-fix-to-release-eprobe-when-failed-to-.patch
+x86-fix-build-regression-with-config_kexec_jump-enab.patch
diff --git a/queue-6.6/tracing-eprobe-fix-to-release-eprobe-when-failed-to-.patch b/queue-6.6/tracing-eprobe-fix-to-release-eprobe-when-failed-to-.patch

new file mode 100644 (file)

index 0000000..6ee175a
--- /dev/null
+++ b/queue-6.6/tracing-eprobe-fix-to-release-eprobe-when-failed-to-.patch
@@ -0,0 +1,40 @@
+From 51fd9728d77266aed55052263b11152f334cdf7f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 30 Nov 2024 01:47:47 +0900
+Subject: tracing/eprobe: Fix to release eprobe when failed to add dyn_event
+
+From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+
+[ Upstream commit 494b332064c0ce2f7392fa92632bc50191c1b517 ]
+
+Fix eprobe event to unregister event call and release eprobe when it fails
+to add dynamic event correctly.
+
+Link: https://lore.kernel.org/all/173289886698.73724.1959899350183686006.stgit@devnote2/
+
+Fixes: 7491e2c44278 ("tracing: Add a probe that attaches to trace events")
+Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/trace/trace_eprobe.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c
+index 31bb977670bdf..f1f12ce5efb0a 100644
+--- a/kernel/trace/trace_eprobe.c
++++ b/kernel/trace/trace_eprobe.c
+@@ -966,6 +966,11 @@ static int __trace_eprobe_create(int argc, const char *argv[])
+               goto error;
+       }
+       ret = dyn_event_add(&ep->devent, &ep->tp.event->call);
++      if (ret < 0) {
++              trace_probe_unregister_event_call(&ep->tp);
++              mutex_unlock(&event_mutex);
++              goto error;
++      }
+       mutex_unlock(&event_mutex);
+       return ret;
+ parse_error:
+-- 
+2.43.0
+
diff --git a/queue-6.6/x86-fix-build-regression-with-config_kexec_jump-enab.patch b/queue-6.6/x86-fix-build-regression-with-config_kexec_jump-enab.patch

new file mode 100644 (file)

index 0000000..5df31bc
--- /dev/null
+++ b/queue-6.6/x86-fix-build-regression-with-config_kexec_jump-enab.patch
@@ -0,0 +1,50 @@
+From a62c939147c690ad56e6161b3cc45a510ed76f50 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 9 Dec 2024 08:53:32 +0900
+Subject: x86: Fix build regression with CONFIG_KEXEC_JUMP enabled
+
+From: Damien Le Moal <dlemoal@kernel.org>
+
+[ Upstream commit aeb68937614f4aeceaaa762bd7f0212ce842b797 ]
+
+Build 6.13-rc12 for x86_64 with gcc 14.2.1 fails with the error:
+
+  ld: vmlinux.o: in function `virtual_mapped':
+  linux/arch/x86/kernel/relocate_kernel_64.S:249:(.text+0x5915b): undefined reference to `saved_context_gdt_desc'
+
+when CONFIG_KEXEC_JUMP is enabled.
+
+This was introduced by commit 07fa619f2a40 ("x86/kexec: Restore GDT on
+return from ::preserve_context kexec") which introduced a use of
+saved_context_gdt_desc without a declaration for it.
+
+Fix that by including asm/asm-offsets.h where saved_context_gdt_desc
+is defined (indirectly in include/generated/asm-offsets.h which
+asm/asm-offsets.h includes).
+
+Fixes: 07fa619f2a40 ("x86/kexec: Restore GDT on return from ::preserve_context kexec")
+Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
+Acked-by: Borislav Petkov (AMD) <bp@alien8.de>
+Acked-by: David Woodhouse <dwmw@amazon.co.uk>
+Closes: https://lore.kernel.org/oe-kbuild-all/202411270006.ZyyzpYf8-lkp@intel.com/
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kernel/relocate_kernel_64.S | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
+index 569d5577059db..fb00a9e8b0879 100644
+--- a/arch/x86/kernel/relocate_kernel_64.S
++++ b/arch/x86/kernel/relocate_kernel_64.S
+@@ -11,6 +11,7 @@
+ #include <asm/pgtable_types.h>
+ #include <asm/nospec-branch.h>
+ #include <asm/unwind_hints.h>
++#include <asm/asm-offsets.h>
+ 
+ /*
+  * Must be relocatable PIC code callable as a C function, in particular
+-- 
+2.43.0
+
author	Sasha Levin <sashal@kernel.org>
	Thu, 12 Dec 2024 01:10:52 +0000 (20:10 -0500)
committer	Sasha Levin <sashal@kernel.org>
	Thu, 12 Dec 2024 01:10:52 +0000 (20:10 -0500)
queue-6.6/btrfs-fix-missing-snapshot-drew-unlock-when-root-is-.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/clk-en7523-initialize-num-before-accessing-hws-in-en.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/sched-core-prevent-wakeup-of-ksoftirqd-during-idle-l.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/sched-core-remove-the-unnecessary-need_resched-check.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/sched-deadline-collect-sched_dl_entity-initializatio.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/sched-deadline-fix-warning-in-migrate_enable-for-boo.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/sched-deadline-move-bandwidth-accounting-into-en-de-.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/sched-fair-check-idle_cpu-before-need_resched-to-det.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/sched-fair-rename-check_preempt_curr-to-wakeup_preem.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/sched-fair-rename-check_preempt_wakeup-to-check_pree.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/sched-headers-move-struct-sched_param-out-of-uapi-to.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/sched-remove-vruntime-from-trace_sched_stat_runtime.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/sched-unify-more-update_curr.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/sched-unify-runtime-accounting-across-classes.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/series		patch \| blob \| blame \| history
queue-6.6/tracing-eprobe-fix-to-release-eprobe-when-failed-to-.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/x86-fix-build-regression-with-config_kexec_jump-enab.patch	[new file with mode: 0644]	patch \| blob