--- /dev/null
+From 1605e10745f6c295c736fb38beb4354cdf7ba85c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 29 Nov 2024 13:33:03 +0000
+Subject: btrfs: fix missing snapshot drew unlock when root is dead during swap
+ activation
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit 9c803c474c6c002d8ade68ebe99026cc39c37f85 ]
+
+When activating a swap file we acquire the root's snapshot drew lock and
+then check if the root is dead, failing and returning with -EPERM if it's
+dead but without unlocking the root's snapshot lock. Fix this by adding
+the missing unlock.
+
+Fixes: 60021bd754c6 ("btrfs: prevent subvol with swapfile from being deleted")
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/inode.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
+index ea19ea75674d2..035815c439498 100644
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -10693,6 +10693,7 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
+ if (btrfs_root_dead(root)) {
+ spin_unlock(&root->root_item_lock);
+
++ btrfs_drew_write_unlock(&root->snapshot_lock);
+ btrfs_exclop_finish(fs_info);
+ btrfs_warn(fs_info,
+ "cannot activate swapfile because subvolume %llu is being deleted",
+--
+2.43.0
+
--- /dev/null
+From 2ba38c2e1a95aad9e06d890de1806810f99a4cfa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 3 Dec 2024 22:29:15 +0800
+Subject: clk: en7523: Initialize num before accessing hws in
+ en7523_register_clocks()
+
+From: Haoyu Li <lihaoyu499@gmail.com>
+
+[ Upstream commit 52fd1709e41d3a85b48bcfe2404a024ebaf30c3b ]
+
+With the new __counted_by annotation in clk_hw_onecell_data, the "num"
+struct member must be set before accessing the "hws" array. Failing to
+do so will trigger a runtime warning when enabling CONFIG_UBSAN_BOUNDS
+and CONFIG_FORTIFY_SOURCE.
+
+Fixes: f316cdff8d67 ("clk: Annotate struct clk_hw_onecell_data with __counted_by")
+Signed-off-by: Haoyu Li <lihaoyu499@gmail.com>
+Link: https://lore.kernel.org/r/20241203142915.345523-1-lihaoyu499@gmail.com
+Signed-off-by: Stephen Boyd <sboyd@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/clk/clk-en7523.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/clk/clk-en7523.c b/drivers/clk/clk-en7523.c
+index 7914e60f3d6c5..1331b4bacf0b6 100644
+--- a/drivers/clk/clk-en7523.c
++++ b/drivers/clk/clk-en7523.c
+@@ -284,6 +284,8 @@ static void en7523_register_clocks(struct device *dev, struct clk_hw_onecell_dat
+ u32 rate;
+ int i;
+
++ clk_data->num = EN7523_NUM_CLOCKS;
++
+ for (i = 0; i < ARRAY_SIZE(en7523_base_clks); i++) {
+ const struct en_clk_desc *desc = &en7523_base_clks[i];
+
+@@ -302,8 +304,6 @@ static void en7523_register_clocks(struct device *dev, struct clk_hw_onecell_dat
+
+ hw = en7523_register_pcie_clk(dev, np_base);
+ clk_data->hws[EN7523_CLK_PCIE] = hw;
+-
+- clk_data->num = EN7523_NUM_CLOCKS;
+ }
+
+ static int en7523_clk_probe(struct platform_device *pdev)
+--
+2.43.0
+
--- /dev/null
+From 75b03fdc8dab8e64d761c690fa62dcc38e12d891 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 19 Nov 2024 05:44:32 +0000
+Subject: sched/core: Prevent wakeup of ksoftirqd during idle load balance
+
+From: K Prateek Nayak <kprateek.nayak@amd.com>
+
+[ Upstream commit e932c4ab38f072ce5894b2851fea8bc5754bb8e5 ]
+
+Scheduler raises a SCHED_SOFTIRQ to trigger a load balancing event on
+from the IPI handler on the idle CPU. If the SMP function is invoked
+from an idle CPU via flush_smp_call_function_queue() then the HARD-IRQ
+flag is not set and raise_softirq_irqoff() needlessly wakes ksoftirqd
+because soft interrupts are handled before ksoftirqd get on the CPU.
+
+Adding a trace_printk() in nohz_csd_func() at the spot of raising
+SCHED_SOFTIRQ and enabling trace events for sched_switch, sched_wakeup,
+and softirq_entry (for SCHED_SOFTIRQ vector alone) helps observing the
+current behavior:
+
+ <idle>-0 [000] dN.1.: nohz_csd_func: Raising SCHED_SOFTIRQ from nohz_csd_func
+ <idle>-0 [000] dN.4.: sched_wakeup: comm=ksoftirqd/0 pid=16 prio=120 target_cpu=000
+ <idle>-0 [000] .Ns1.: softirq_entry: vec=7 [action=SCHED]
+ <idle>-0 [000] .Ns1.: softirq_exit: vec=7 [action=SCHED]
+ <idle>-0 [000] d..2.: sched_switch: prev_comm=swapper/0 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=ksoftirqd/0 next_pid=16 next_prio=120
+ ksoftirqd/0-16 [000] d..2.: sched_switch: prev_comm=ksoftirqd/0 prev_pid=16 prev_prio=120 prev_state=S ==> next_comm=swapper/0 next_pid=0 next_prio=120
+ ...
+
+Use __raise_softirq_irqoff() to raise the softirq. The SMP function call
+is always invoked on the requested CPU in an interrupt handler. It is
+guaranteed that soft interrupts are handled at the end.
+
+Following are the observations with the changes when enabling the same
+set of events:
+
+ <idle>-0 [000] dN.1.: nohz_csd_func: Raising SCHED_SOFTIRQ for nohz_idle_balance
+ <idle>-0 [000] dN.1.: softirq_raise: vec=7 [action=SCHED]
+ <idle>-0 [000] .Ns1.: softirq_entry: vec=7 [action=SCHED]
+
+No unnecessary ksoftirqd wakeups are seen from idle task's context to
+service the softirq.
+
+Fixes: b2a02fc43a1f ("smp: Optimize send_call_function_single_ipi()")
+Closes: https://lore.kernel.org/lkml/fcf823f-195e-6c9a-eac3-25f870cb35ac@inria.fr/ [1]
+Reported-by: Julia Lawall <julia.lawall@inria.fr>
+Suggested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Link: https://lore.kernel.org/r/20241119054432.6405-5-kprateek.nayak@amd.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/core.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 8abd1bf31864e..da14c7450156b 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -1183,7 +1183,7 @@ static void nohz_csd_func(void *info)
+ rq->idle_balance = idle_cpu(cpu);
+ if (rq->idle_balance) {
+ rq->nohz_idle_balance = flags;
+- raise_softirq_irqoff(SCHED_SOFTIRQ);
++ __raise_softirq_irqoff(SCHED_SOFTIRQ);
+ }
+ }
+
+--
+2.43.0
+
--- /dev/null
+From 76c0bd30b8f57a72e9d513123a9cdcbda4772822 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 19 Nov 2024 05:44:30 +0000
+Subject: sched/core: Remove the unnecessary need_resched() check in
+ nohz_csd_func()
+
+From: K Prateek Nayak <kprateek.nayak@amd.com>
+
+[ Upstream commit ea9cffc0a154124821531991d5afdd7e8b20d7aa ]
+
+The need_resched() check currently in nohz_csd_func() can be tracked
+to have been added in scheduler_ipi() back in 2011 via commit
+ca38062e57e9 ("sched: Use resched IPI to kick off the nohz idle balance")
+
+Since then, it has travelled quite a bit but it seems like an idle_cpu()
+check currently is sufficient to detect the need to bail out from an
+idle load balancing. To justify this removal, consider all the following
+case where an idle load balancing could race with a task wakeup:
+
+o Since commit f3dd3f674555b ("sched: Remove the limitation of WF_ON_CPU
+ on wakelist if wakee cpu is idle") a target perceived to be idle
+ (target_rq->nr_running == 0) will return true for
+ ttwu_queue_cond(target) which will offload the task wakeup to the idle
+ target via an IPI.
+
+ In all such cases target_rq->ttwu_pending will be set to 1 before
+ queuing the wake function.
+
+ If an idle load balance races here, following scenarios are possible:
+
+ - The CPU is not in TIF_POLLING_NRFLAG mode in which case an actual
+ IPI is sent to the CPU to wake it out of idle. If the
+ nohz_csd_func() queues before sched_ttwu_pending(), the idle load
+ balance will bail out since idle_cpu(target) returns 0 since
+ target_rq->ttwu_pending is 1. If the nohz_csd_func() is queued after
+ sched_ttwu_pending() it should see rq->nr_running to be non-zero and
+ bail out of idle load balancing.
+
+ - The CPU is in TIF_POLLING_NRFLAG mode and instead of an actual IPI,
+ the sender will simply set TIF_NEED_RESCHED for the target to put it
+ out of idle and flush_smp_call_function_queue() in do_idle() will
+ execute the call function. Depending on the ordering of the queuing
+ of nohz_csd_func() and sched_ttwu_pending(), the idle_cpu() check in
+ nohz_csd_func() should either see target_rq->ttwu_pending = 1 or
+ target_rq->nr_running to be non-zero if there is a genuine task
+ wakeup racing with the idle load balance kick.
+
+o The waker CPU perceives the target CPU to be busy
+ (targer_rq->nr_running != 0) but the CPU is in fact going idle and due
+ to a series of unfortunate events, the system reaches a case where the
+ waker CPU decides to perform the wakeup by itself in ttwu_queue() on
+ the target CPU but target is concurrently selected for idle load
+ balance (XXX: Can this happen? I'm not sure, but we'll consider the
+ mother of all coincidences to estimate the worst case scenario).
+
+ ttwu_do_activate() calls enqueue_task() which would increment
+ "rq->nr_running" post which it calls wakeup_preempt() which is
+ responsible for setting TIF_NEED_RESCHED (via a resched IPI or by
+ setting TIF_NEED_RESCHED on a TIF_POLLING_NRFLAG idle CPU) The key
+ thing to note in this case is that rq->nr_running is already non-zero
+ in case of a wakeup before TIF_NEED_RESCHED is set which would
+ lead to idle_cpu() check returning false.
+
+In all cases, it seems that need_resched() check is unnecessary when
+checking for idle_cpu() first since an impending wakeup racing with idle
+load balancer will either set the "rq->ttwu_pending" or indicate a newly
+woken task via "rq->nr_running".
+
+Chasing the reason why this check might have existed in the first place,
+I came across Peter's suggestion on the fist iteration of Suresh's
+patch from 2011 [1] where the condition to raise the SCHED_SOFTIRQ was:
+
+ sched_ttwu_do_pending(list);
+
+ if (unlikely((rq->idle == current) &&
+ rq->nohz_balance_kick &&
+ !need_resched()))
+ raise_softirq_irqoff(SCHED_SOFTIRQ);
+
+Since the condition to raise the SCHED_SOFIRQ was preceded by
+sched_ttwu_do_pending() (which is equivalent of sched_ttwu_pending()) in
+the current upstream kernel, the need_resched() check was necessary to
+catch a newly queued task. Peter suggested modifying it to:
+
+ if (idle_cpu() && rq->nohz_balance_kick && !need_resched())
+ raise_softirq_irqoff(SCHED_SOFTIRQ);
+
+where idle_cpu() seems to have replaced "rq->idle == current" check.
+
+Even back then, the idle_cpu() check would have been sufficient to catch
+a new task being enqueued. Since commit b2a02fc43a1f ("smp: Optimize
+send_call_function_single_ipi()") overloads the interpretation of
+TIF_NEED_RESCHED for TIF_POLLING_NRFLAG idling, remove the
+need_resched() check in nohz_csd_func() to raise SCHED_SOFTIRQ based
+on Peter's suggestion.
+
+Fixes: b2a02fc43a1f ("smp: Optimize send_call_function_single_ipi()")
+Suggested-by: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20241119054432.6405-3-kprateek.nayak@amd.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/core.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index b6f922a20f83a..8abd1bf31864e 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -1181,7 +1181,7 @@ static void nohz_csd_func(void *info)
+ WARN_ON(!(flags & NOHZ_KICK_MASK));
+
+ rq->idle_balance = idle_cpu(cpu);
+- if (rq->idle_balance && !need_resched()) {
++ if (rq->idle_balance) {
+ rq->nohz_idle_balance = flags;
+ raise_softirq_irqoff(SCHED_SOFTIRQ);
+ }
+--
+2.43.0
+
--- /dev/null
+From 775f0261940f64fcd57eeac0950684c3fe73b2af Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 4 Nov 2023 11:59:19 +0100
+Subject: sched/deadline: Collect sched_dl_entity initialization
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+[ Upstream commit 9e07d45c5210f5dd6701c00d55791983db7320fa ]
+
+Create a single function that initializes a sched_dl_entity.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Daniel Bristot de Oliveira <bristot@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Phil Auld <pauld@redhat.com>
+Reviewed-by: Valentin Schneider <vschneid@redhat.com>
+Link: https://lkml.kernel.org/r/51acc695eecf0a1a2f78f9a044e11ffd9b316bcf.1699095159.git.bristot@kernel.org
+Stable-dep-of: 0664e2c311b9 ("sched/deadline: Fix warning in migrate_enable for boosted tasks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/core.c | 5 +----
+ kernel/sched/deadline.c | 22 +++++++++++++++-------
+ kernel/sched/sched.h | 5 +----
+ 3 files changed, 17 insertions(+), 15 deletions(-)
+
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 7181e6aae16b4..228f7c07da728 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -4513,10 +4513,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
+ memset(&p->stats, 0, sizeof(p->stats));
+ #endif
+
+- RB_CLEAR_NODE(&p->dl.rb_node);
+- init_dl_task_timer(&p->dl);
+- init_dl_inactive_task_timer(&p->dl);
+- __dl_clear_params(p);
++ init_dl_entity(&p->dl);
+
+ INIT_LIST_HEAD(&p->rt.run_list);
+ p->rt.timeout = 0;
+diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
+index 6421d28553576..97b548c343ddd 100644
+--- a/kernel/sched/deadline.c
++++ b/kernel/sched/deadline.c
+@@ -335,6 +335,8 @@ static void dl_change_utilization(struct task_struct *p, u64 new_bw)
+ __add_rq_bw(new_bw, &rq->dl);
+ }
+
++static void __dl_clear_params(struct sched_dl_entity *dl_se);
++
+ /*
+ * The utilization of a task cannot be immediately removed from
+ * the rq active utilization (running_bw) when the task blocks.
+@@ -434,7 +436,7 @@ static void task_non_contending(struct task_struct *p)
+ raw_spin_lock(&dl_b->lock);
+ __dl_sub(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
+ raw_spin_unlock(&dl_b->lock);
+- __dl_clear_params(p);
++ __dl_clear_params(dl_se);
+ }
+
+ return;
+@@ -1207,7 +1209,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
+ return HRTIMER_NORESTART;
+ }
+
+-void init_dl_task_timer(struct sched_dl_entity *dl_se)
++static void init_dl_task_timer(struct sched_dl_entity *dl_se)
+ {
+ struct hrtimer *timer = &dl_se->dl_timer;
+
+@@ -1413,7 +1415,7 @@ static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer)
+ raw_spin_lock(&dl_b->lock);
+ __dl_sub(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
+ raw_spin_unlock(&dl_b->lock);
+- __dl_clear_params(p);
++ __dl_clear_params(dl_se);
+
+ goto unlock;
+ }
+@@ -1429,7 +1431,7 @@ static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer)
+ return HRTIMER_NORESTART;
+ }
+
+-void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se)
++static void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se)
+ {
+ struct hrtimer *timer = &dl_se->inactive_timer;
+
+@@ -2986,10 +2988,8 @@ bool __checkparam_dl(const struct sched_attr *attr)
+ /*
+ * This function clears the sched_dl_entity static params.
+ */
+-void __dl_clear_params(struct task_struct *p)
++static void __dl_clear_params(struct sched_dl_entity *dl_se)
+ {
+- struct sched_dl_entity *dl_se = &p->dl;
+-
+ dl_se->dl_runtime = 0;
+ dl_se->dl_deadline = 0;
+ dl_se->dl_period = 0;
+@@ -3007,6 +3007,14 @@ void __dl_clear_params(struct task_struct *p)
+ #endif
+ }
+
++void init_dl_entity(struct sched_dl_entity *dl_se)
++{
++ RB_CLEAR_NODE(&dl_se->rb_node);
++ init_dl_task_timer(dl_se);
++ init_dl_inactive_task_timer(dl_se);
++ __dl_clear_params(dl_se);
++}
++
+ bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)
+ {
+ struct sched_dl_entity *dl_se = &p->dl;
+diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
+index 1d586e7576bc2..992ac92d021d2 100644
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -286,8 +286,6 @@ struct rt_bandwidth {
+ unsigned int rt_period_active;
+ };
+
+-void __dl_clear_params(struct task_struct *p);
+-
+ static inline int dl_bandwidth_enabled(void)
+ {
+ return sysctl_sched_rt_runtime >= 0;
+@@ -2446,8 +2444,7 @@ extern struct rt_bandwidth def_rt_bandwidth;
+ extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
+ extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);
+
+-extern void init_dl_task_timer(struct sched_dl_entity *dl_se);
+-extern void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se);
++extern void init_dl_entity(struct sched_dl_entity *dl_se);
+
+ #define BW_SHIFT 20
+ #define BW_UNIT (1 << BW_SHIFT)
+--
+2.43.0
+
--- /dev/null
+From f20a5fb26b13eb73e6ca14d5e7afa61b92fd129d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 24 Jul 2024 11:22:47 -0300
+Subject: sched/deadline: Fix warning in migrate_enable for boosted tasks
+
+From: Wander Lairson Costa <wander@redhat.com>
+
+[ Upstream commit 0664e2c311b9fa43b33e3e81429cd0c2d7f9c638 ]
+
+When running the following command:
+
+while true; do
+ stress-ng --cyclic 30 --timeout 30s --minimize --quiet
+done
+
+a warning is eventually triggered:
+
+WARNING: CPU: 43 PID: 2848 at kernel/sched/deadline.c:794
+setup_new_dl_entity+0x13e/0x180
+...
+Call Trace:
+ <TASK>
+ ? show_trace_log_lvl+0x1c4/0x2df
+ ? enqueue_dl_entity+0x631/0x6e0
+ ? setup_new_dl_entity+0x13e/0x180
+ ? __warn+0x7e/0xd0
+ ? report_bug+0x11a/0x1a0
+ ? handle_bug+0x3c/0x70
+ ? exc_invalid_op+0x14/0x70
+ ? asm_exc_invalid_op+0x16/0x20
+ enqueue_dl_entity+0x631/0x6e0
+ enqueue_task_dl+0x7d/0x120
+ __do_set_cpus_allowed+0xe3/0x280
+ __set_cpus_allowed_ptr_locked+0x140/0x1d0
+ __set_cpus_allowed_ptr+0x54/0xa0
+ migrate_enable+0x7e/0x150
+ rt_spin_unlock+0x1c/0x90
+ group_send_sig_info+0xf7/0x1a0
+ ? kill_pid_info+0x1f/0x1d0
+ kill_pid_info+0x78/0x1d0
+ kill_proc_info+0x5b/0x110
+ __x64_sys_kill+0x93/0xc0
+ do_syscall_64+0x5c/0xf0
+ entry_SYSCALL_64_after_hwframe+0x6e/0x76
+ RIP: 0033:0x7f0dab31f92b
+
+This warning occurs because set_cpus_allowed dequeues and enqueues tasks
+with the ENQUEUE_RESTORE flag set. If the task is boosted, the warning
+is triggered. A boosted task already had its parameters set by
+rt_mutex_setprio, and a new call to setup_new_dl_entity is unnecessary,
+hence the WARN_ON call.
+
+Check if we are requeueing a boosted task and avoid calling
+setup_new_dl_entity if that's the case.
+
+Fixes: 295d6d5e3736 ("sched/deadline: Fix switching to -deadline")
+Signed-off-by: Wander Lairson Costa <wander@redhat.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Juri Lelli <juri.lelli@redhat.com>
+Link: https://lore.kernel.org/r/20240724142253.27145-2-wander@redhat.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/deadline.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
+index aacd3bf9fa4e7..b9e99bc3b1cf2 100644
+--- a/kernel/sched/deadline.c
++++ b/kernel/sched/deadline.c
+@@ -1674,6 +1674,7 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags)
+ } else if (flags & ENQUEUE_REPLENISH) {
+ replenish_dl_entity(dl_se);
+ } else if ((flags & ENQUEUE_RESTORE) &&
++ !is_dl_boosted(dl_se) &&
+ dl_time_before(dl_se->deadline,
+ rq_clock(rq_of_dl_rq(dl_rq_of_se(dl_se))))) {
+ setup_new_dl_entity(dl_se);
+--
+2.43.0
+
--- /dev/null
+From 5209b4474f86b986548fcc5a9f691e21ae79c575 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 4 Nov 2023 11:59:20 +0100
+Subject: sched/deadline: Move bandwidth accounting into {en,de}queue_dl_entity
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+[ Upstream commit 2f7a0f58948d8231236e2facecc500f1930fb996 ]
+
+In preparation of introducing !task sched_dl_entity; move the
+bandwidth accounting into {en.de}queue_dl_entity().
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Daniel Bristot de Oliveira <bristot@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Phil Auld <pauld@redhat.com>
+Reviewed-by: Valentin Schneider <vschneid@redhat.com>
+Link: https://lkml.kernel.org/r/a86dccbbe44e021b8771627e1dae01a69b73466d.1699095159.git.bristot@kernel.org
+Stable-dep-of: 0664e2c311b9 ("sched/deadline: Fix warning in migrate_enable for boosted tasks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/deadline.c | 130 ++++++++++++++++++++++------------------
+ kernel/sched/sched.h | 6 ++
+ 2 files changed, 78 insertions(+), 58 deletions(-)
+
+diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
+index 97b548c343ddd..aacd3bf9fa4e7 100644
+--- a/kernel/sched/deadline.c
++++ b/kernel/sched/deadline.c
+@@ -391,12 +391,12 @@ static void __dl_clear_params(struct sched_dl_entity *dl_se);
+ * up, and checks if the task is still in the "ACTIVE non contending"
+ * state or not (in the second case, it updates running_bw).
+ */
+-static void task_non_contending(struct task_struct *p)
++static void task_non_contending(struct sched_dl_entity *dl_se)
+ {
+- struct sched_dl_entity *dl_se = &p->dl;
+ struct hrtimer *timer = &dl_se->inactive_timer;
+ struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
+ struct rq *rq = rq_of_dl_rq(dl_rq);
++ struct task_struct *p = dl_task_of(dl_se);
+ s64 zerolag_time;
+
+ /*
+@@ -428,13 +428,14 @@ static void task_non_contending(struct task_struct *p)
+ if ((zerolag_time < 0) || hrtimer_active(&dl_se->inactive_timer)) {
+ if (dl_task(p))
+ sub_running_bw(dl_se, dl_rq);
++
+ if (!dl_task(p) || READ_ONCE(p->__state) == TASK_DEAD) {
+ struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
+
+ if (READ_ONCE(p->__state) == TASK_DEAD)
+- sub_rq_bw(&p->dl, &rq->dl);
++ sub_rq_bw(dl_se, &rq->dl);
+ raw_spin_lock(&dl_b->lock);
+- __dl_sub(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
++ __dl_sub(dl_b, dl_se->dl_bw, dl_bw_cpus(task_cpu(p)));
+ raw_spin_unlock(&dl_b->lock);
+ __dl_clear_params(dl_se);
+ }
+@@ -1627,6 +1628,41 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags)
+
+ update_stats_enqueue_dl(dl_rq_of_se(dl_se), dl_se, flags);
+
++ /*
++ * Check if a constrained deadline task was activated
++ * after the deadline but before the next period.
++ * If that is the case, the task will be throttled and
++ * the replenishment timer will be set to the next period.
++ */
++ if (!dl_se->dl_throttled && !dl_is_implicit(dl_se))
++ dl_check_constrained_dl(dl_se);
++
++ if (flags & (ENQUEUE_RESTORE|ENQUEUE_MIGRATING)) {
++ struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
++
++ add_rq_bw(dl_se, dl_rq);
++ add_running_bw(dl_se, dl_rq);
++ }
++
++ /*
++ * If p is throttled, we do not enqueue it. In fact, if it exhausted
++ * its budget it needs a replenishment and, since it now is on
++ * its rq, the bandwidth timer callback (which clearly has not
++ * run yet) will take care of this.
++ * However, the active utilization does not depend on the fact
++ * that the task is on the runqueue or not (but depends on the
++ * task's state - in GRUB parlance, "inactive" vs "active contending").
++ * In other words, even if a task is throttled its utilization must
++ * be counted in the active utilization; hence, we need to call
++ * add_running_bw().
++ */
++ if (dl_se->dl_throttled && !(flags & ENQUEUE_REPLENISH)) {
++ if (flags & ENQUEUE_WAKEUP)
++ task_contending(dl_se, flags);
++
++ return;
++ }
++
+ /*
+ * If this is a wakeup or a new instance, the scheduling
+ * parameters of the task might need updating. Otherwise,
+@@ -1646,9 +1682,28 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags)
+ __enqueue_dl_entity(dl_se);
+ }
+
+-static void dequeue_dl_entity(struct sched_dl_entity *dl_se)
++static void dequeue_dl_entity(struct sched_dl_entity *dl_se, int flags)
+ {
+ __dequeue_dl_entity(dl_se);
++
++ if (flags & (DEQUEUE_SAVE|DEQUEUE_MIGRATING)) {
++ struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
++
++ sub_running_bw(dl_se, dl_rq);
++ sub_rq_bw(dl_se, dl_rq);
++ }
++
++ /*
++ * This check allows to start the inactive timer (or to immediately
++ * decrease the active utilization, if needed) in two cases:
++ * when the task blocks and when it is terminating
++ * (p->state == TASK_DEAD). We can handle the two cases in the same
++ * way, because from GRUB's point of view the same thing is happening
++ * (the task moves from "active contending" to "active non contending"
++ * or "inactive")
++ */
++ if (flags & DEQUEUE_SLEEP)
++ task_non_contending(dl_se);
+ }
+
+ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
+@@ -1693,76 +1748,35 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
+ return;
+ }
+
+- /*
+- * Check if a constrained deadline task was activated
+- * after the deadline but before the next period.
+- * If that is the case, the task will be throttled and
+- * the replenishment timer will be set to the next period.
+- */
+- if (!p->dl.dl_throttled && !dl_is_implicit(&p->dl))
+- dl_check_constrained_dl(&p->dl);
+-
+- if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & ENQUEUE_RESTORE) {
+- add_rq_bw(&p->dl, &rq->dl);
+- add_running_bw(&p->dl, &rq->dl);
+- }
+-
+- /*
+- * If p is throttled, we do not enqueue it. In fact, if it exhausted
+- * its budget it needs a replenishment and, since it now is on
+- * its rq, the bandwidth timer callback (which clearly has not
+- * run yet) will take care of this.
+- * However, the active utilization does not depend on the fact
+- * that the task is on the runqueue or not (but depends on the
+- * task's state - in GRUB parlance, "inactive" vs "active contending").
+- * In other words, even if a task is throttled its utilization must
+- * be counted in the active utilization; hence, we need to call
+- * add_running_bw().
+- */
+- if (p->dl.dl_throttled && !(flags & ENQUEUE_REPLENISH)) {
+- if (flags & ENQUEUE_WAKEUP)
+- task_contending(&p->dl, flags);
+-
+- return;
+- }
+-
+ check_schedstat_required();
+ update_stats_wait_start_dl(dl_rq_of_se(&p->dl), &p->dl);
+
++ if (p->on_rq == TASK_ON_RQ_MIGRATING)
++ flags |= ENQUEUE_MIGRATING;
++
+ enqueue_dl_entity(&p->dl, flags);
+
+- if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
++ if (!task_current(rq, p) && !p->dl.dl_throttled && p->nr_cpus_allowed > 1)
+ enqueue_pushable_dl_task(rq, p);
+ }
+
+ static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
+ {
+ update_stats_dequeue_dl(&rq->dl, &p->dl, flags);
+- dequeue_dl_entity(&p->dl);
+- dequeue_pushable_dl_task(rq, p);
++ dequeue_dl_entity(&p->dl, flags);
++
++ if (!p->dl.dl_throttled)
++ dequeue_pushable_dl_task(rq, p);
+ }
+
+ static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
+ {
+ update_curr_dl(rq);
+- __dequeue_task_dl(rq, p, flags);
+
+- if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & DEQUEUE_SAVE) {
+- sub_running_bw(&p->dl, &rq->dl);
+- sub_rq_bw(&p->dl, &rq->dl);
+- }
++ if (p->on_rq == TASK_ON_RQ_MIGRATING)
++ flags |= DEQUEUE_MIGRATING;
+
+- /*
+- * This check allows to start the inactive timer (or to immediately
+- * decrease the active utilization, if needed) in two cases:
+- * when the task blocks and when it is terminating
+- * (p->state == TASK_DEAD). We can handle the two cases in the same
+- * way, because from GRUB's point of view the same thing is happening
+- * (the task moves from "active contending" to "active non contending"
+- * or "inactive")
+- */
+- if (flags & DEQUEUE_SLEEP)
+- task_non_contending(p);
++ __dequeue_task_dl(rq, p, flags);
+ }
+
+ /*
+@@ -2580,7 +2594,7 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p)
+ * will reset the task parameters.
+ */
+ if (task_on_rq_queued(p) && p->dl.dl_runtime)
+- task_non_contending(p);
++ task_non_contending(&p->dl);
+
+ /*
+ * In case a task is setscheduled out from SCHED_DEADLINE we need to
+diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
+index 992ac92d021d2..d48c6a292a83d 100644
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -2196,6 +2196,10 @@ extern const u32 sched_prio_to_wmult[40];
+ * MOVE - paired with SAVE/RESTORE, explicitly does not preserve the location
+ * in the runqueue.
+ *
++ * NOCLOCK - skip the update_rq_clock() (avoids double updates)
++ *
++ * MIGRATION - p->on_rq == TASK_ON_RQ_MIGRATING (used for DEADLINE)
++ *
+ * ENQUEUE_HEAD - place at front of runqueue (tail if not specified)
+ * ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline)
+ * ENQUEUE_MIGRATED - the task was migrated during wakeup
+@@ -2206,6 +2210,7 @@ extern const u32 sched_prio_to_wmult[40];
+ #define DEQUEUE_SAVE 0x02 /* Matches ENQUEUE_RESTORE */
+ #define DEQUEUE_MOVE 0x04 /* Matches ENQUEUE_MOVE */
+ #define DEQUEUE_NOCLOCK 0x08 /* Matches ENQUEUE_NOCLOCK */
++#define DEQUEUE_MIGRATING 0x100 /* Matches ENQUEUE_MIGRATING */
+
+ #define ENQUEUE_WAKEUP 0x01
+ #define ENQUEUE_RESTORE 0x02
+@@ -2220,6 +2225,7 @@ extern const u32 sched_prio_to_wmult[40];
+ #define ENQUEUE_MIGRATED 0x00
+ #endif
+ #define ENQUEUE_INITIAL 0x80
++#define ENQUEUE_MIGRATING 0x100
+
+ #define RETRY_TASK ((void *)-1UL)
+
+--
+2.43.0
+
--- /dev/null
+From c864ba7c37d6522e910bcf3575fcd9b81d86e0e6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 19 Nov 2024 05:44:31 +0000
+Subject: sched/fair: Check idle_cpu() before need_resched() to detect ilb CPU
+ turning busy
+
+From: K Prateek Nayak <kprateek.nayak@amd.com>
+
+[ Upstream commit ff47a0acfcce309cf9e175149c75614491953c8f ]
+
+Commit b2a02fc43a1f ("smp: Optimize send_call_function_single_ipi()")
+optimizes IPIs to idle CPUs in TIF_POLLING_NRFLAG mode by setting the
+TIF_NEED_RESCHED flag in idle task's thread info and relying on
+flush_smp_call_function_queue() in idle exit path to run the
+call-function. A softirq raised by the call-function is handled shortly
+after in do_softirq_post_smp_call_flush() but the TIF_NEED_RESCHED flag
+remains set and is only cleared later when schedule_idle() calls
+__schedule().
+
+need_resched() check in _nohz_idle_balance() exists to bail out of load
+balancing if another task has woken up on the CPU currently in-charge of
+idle load balancing which is being processed in SCHED_SOFTIRQ context.
+Since the optimization mentioned above overloads the interpretation of
+TIF_NEED_RESCHED, check for idle_cpu() before going with the existing
+need_resched() check which can catch a genuine task wakeup on an idle
+CPU processing SCHED_SOFTIRQ from do_softirq_post_smp_call_flush(), as
+well as the case where ksoftirqd needs to be preempted as a result of
+new task wakeup or slice expiry.
+
+In case of PREEMPT_RT or threadirqs, although the idle load balancing
+may be inhibited in some cases on the ilb CPU, the fact that ksoftirqd
+is the only fair task going back to sleep will trigger a newidle balance
+on the CPU which will alleviate some imbalance if it exists if idle
+balance fails to do so.
+
+Fixes: b2a02fc43a1f ("smp: Optimize send_call_function_single_ipi()")
+Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20241119054432.6405-4-kprateek.nayak@amd.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/fair.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index ddab19e5bd637..d1a67776ecb5d 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -12121,7 +12121,7 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags)
+ * work being done for other CPUs. Next load
+ * balancing owner will pick it up.
+ */
+- if (need_resched()) {
++ if (!idle_cpu(this_cpu) && need_resched()) {
+ if (flags & NOHZ_STATS_KICK)
+ has_blocked_load = true;
+ if (flags & NOHZ_NEXT_KICK)
+--
+2.43.0
+
--- /dev/null
+From 878a41de1e8673830ab904e5b87c4a68c3a29c34 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 19 Sep 2023 10:38:21 +0200
+Subject: sched/fair: Rename check_preempt_curr() to wakeup_preempt()
+
+From: Ingo Molnar <mingo@kernel.org>
+
+[ Upstream commit e23edc86b09df655bf8963bbcb16647adc787395 ]
+
+The name is a bit opaque - make it clear that this is about wakeup
+preemption.
+
+Also rename the ->check_preempt_curr() methods similarly.
+
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Stable-dep-of: 0664e2c311b9 ("sched/deadline: Fix warning in migrate_enable for boosted tasks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/core.c | 14 +++++++-------
+ kernel/sched/deadline.c | 10 +++++-----
+ kernel/sched/fair.c | 10 +++++-----
+ kernel/sched/idle.c | 4 ++--
+ kernel/sched/rt.c | 6 +++---
+ kernel/sched/sched.h | 4 ++--
+ kernel/sched/stop_task.c | 4 ++--
+ 7 files changed, 26 insertions(+), 26 deletions(-)
+
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index da14c7450156b..7181e6aae16b4 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -2214,10 +2214,10 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p,
+ p->sched_class->prio_changed(rq, p, oldprio);
+ }
+
+-void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
++void wakeup_preempt(struct rq *rq, struct task_struct *p, int flags)
+ {
+ if (p->sched_class == rq->curr->sched_class)
+- rq->curr->sched_class->check_preempt_curr(rq, p, flags);
++ rq->curr->sched_class->wakeup_preempt(rq, p, flags);
+ else if (sched_class_above(p->sched_class, rq->curr->sched_class))
+ resched_curr(rq);
+
+@@ -2523,7 +2523,7 @@ static struct rq *move_queued_task(struct rq *rq, struct rq_flags *rf,
+ rq_lock(rq, rf);
+ WARN_ON_ONCE(task_cpu(p) != new_cpu);
+ activate_task(rq, p, 0);
+- check_preempt_curr(rq, p, 0);
++ wakeup_preempt(rq, p, 0);
+
+ return rq;
+ }
+@@ -3409,7 +3409,7 @@ static void __migrate_swap_task(struct task_struct *p, int cpu)
+ deactivate_task(src_rq, p, 0);
+ set_task_cpu(p, cpu);
+ activate_task(dst_rq, p, 0);
+- check_preempt_curr(dst_rq, p, 0);
++ wakeup_preempt(dst_rq, p, 0);
+
+ rq_unpin_lock(dst_rq, &drf);
+ rq_unpin_lock(src_rq, &srf);
+@@ -3785,7 +3785,7 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
+ }
+
+ activate_task(rq, p, en_flags);
+- check_preempt_curr(rq, p, wake_flags);
++ wakeup_preempt(rq, p, wake_flags);
+
+ ttwu_do_wakeup(p);
+
+@@ -3856,7 +3856,7 @@ static int ttwu_runnable(struct task_struct *p, int wake_flags)
+ * it should preempt the task that is current now.
+ */
+ update_rq_clock(rq);
+- check_preempt_curr(rq, p, wake_flags);
++ wakeup_preempt(rq, p, wake_flags);
+ }
+ ttwu_do_wakeup(p);
+ ret = 1;
+@@ -4871,7 +4871,7 @@ void wake_up_new_task(struct task_struct *p)
+
+ activate_task(rq, p, ENQUEUE_NOCLOCK);
+ trace_sched_wakeup_new(p);
+- check_preempt_curr(rq, p, WF_FORK);
++ wakeup_preempt(rq, p, WF_FORK);
+ #ifdef CONFIG_SMP
+ if (p->sched_class->task_woken) {
+ /*
+diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
+index d78f2e8769fb4..36aeaaf9ab090 100644
+--- a/kernel/sched/deadline.c
++++ b/kernel/sched/deadline.c
+@@ -763,7 +763,7 @@ static inline void deadline_queue_pull_task(struct rq *rq)
+
+ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags);
+ static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags);
+-static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p, int flags);
++static void wakeup_preempt_dl(struct rq *rq, struct task_struct *p, int flags);
+
+ static inline void replenish_dl_new_period(struct sched_dl_entity *dl_se,
+ struct rq *rq)
+@@ -1175,7 +1175,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
+
+ enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
+ if (dl_task(rq->curr))
+- check_preempt_curr_dl(rq, p, 0);
++ wakeup_preempt_dl(rq, p, 0);
+ else
+ resched_curr(rq);
+
+@@ -1939,7 +1939,7 @@ static int balance_dl(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
+ * Only called when both the current and waking task are -deadline
+ * tasks.
+ */
+-static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p,
++static void wakeup_preempt_dl(struct rq *rq, struct task_struct *p,
+ int flags)
+ {
+ if (dl_entity_preempt(&p->dl, &rq->curr->dl)) {
+@@ -2654,7 +2654,7 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
+ deadline_queue_push_tasks(rq);
+ #endif
+ if (dl_task(rq->curr))
+- check_preempt_curr_dl(rq, p, 0);
++ wakeup_preempt_dl(rq, p, 0);
+ else
+ resched_curr(rq);
+ } else {
+@@ -2723,7 +2723,7 @@ DEFINE_SCHED_CLASS(dl) = {
+ .dequeue_task = dequeue_task_dl,
+ .yield_task = yield_task_dl,
+
+- .check_preempt_curr = check_preempt_curr_dl,
++ .wakeup_preempt = wakeup_preempt_dl,
+
+ .pick_next_task = pick_next_task_dl,
+ .put_prev_task = put_prev_task_dl,
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index 685774895bcec..a32d344623716 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -8268,7 +8268,7 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int
+
+ /*
+ * This is possible from callers such as attach_tasks(), in which we
+- * unconditionally check_preempt_curr() after an enqueue (which may have
++ * unconditionally wakeup_preempt() after an enqueue (which may have
+ * lead to a throttle). This both saves work and prevents false
+ * next-buddy nomination below.
+ */
+@@ -9167,7 +9167,7 @@ static void attach_task(struct rq *rq, struct task_struct *p)
+
+ WARN_ON_ONCE(task_rq(p) != rq);
+ activate_task(rq, p, ENQUEUE_NOCLOCK);
+- check_preempt_curr(rq, p, 0);
++ wakeup_preempt(rq, p, 0);
+ }
+
+ /*
+@@ -12641,7 +12641,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio)
+ if (p->prio > oldprio)
+ resched_curr(rq);
+ } else
+- check_preempt_curr(rq, p, 0);
++ wakeup_preempt(rq, p, 0);
+ }
+
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+@@ -12743,7 +12743,7 @@ static void switched_to_fair(struct rq *rq, struct task_struct *p)
+ if (task_current(rq, p))
+ resched_curr(rq);
+ else
+- check_preempt_curr(rq, p, 0);
++ wakeup_preempt(rq, p, 0);
+ }
+ }
+
+@@ -13102,7 +13102,7 @@ DEFINE_SCHED_CLASS(fair) = {
+ .yield_task = yield_task_fair,
+ .yield_to_task = yield_to_task_fair,
+
+- .check_preempt_curr = check_preempt_wakeup_fair,
++ .wakeup_preempt = check_preempt_wakeup_fair,
+
+ .pick_next_task = __pick_next_task_fair,
+ .put_prev_task = put_prev_task_fair,
+diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
+index 5007b25c5bc65..565f8374ddbbf 100644
+--- a/kernel/sched/idle.c
++++ b/kernel/sched/idle.c
+@@ -401,7 +401,7 @@ balance_idle(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
+ /*
+ * Idle tasks are unconditionally rescheduled:
+ */
+-static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int flags)
++static void wakeup_preempt_idle(struct rq *rq, struct task_struct *p, int flags)
+ {
+ resched_curr(rq);
+ }
+@@ -482,7 +482,7 @@ DEFINE_SCHED_CLASS(idle) = {
+ /* dequeue is not valid, we print a debug message there: */
+ .dequeue_task = dequeue_task_idle,
+
+- .check_preempt_curr = check_preempt_curr_idle,
++ .wakeup_preempt = wakeup_preempt_idle,
+
+ .pick_next_task = pick_next_task_idle,
+ .put_prev_task = put_prev_task_idle,
+diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
+index 4ac36eb4cdee5..a8c47d8d51bde 100644
+--- a/kernel/sched/rt.c
++++ b/kernel/sched/rt.c
+@@ -957,7 +957,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
+
+ /*
+ * When we're idle and a woken (rt) task is
+- * throttled check_preempt_curr() will set
++ * throttled wakeup_preempt() will set
+ * skip_update and the time between the wakeup
+ * and this unthrottle will get accounted as
+ * 'runtime'.
+@@ -1719,7 +1719,7 @@ static int balance_rt(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
+ /*
+ * Preempt the current task with a newly woken task if needed:
+ */
+-static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags)
++static void wakeup_preempt_rt(struct rq *rq, struct task_struct *p, int flags)
+ {
+ if (p->prio < rq->curr->prio) {
+ resched_curr(rq);
+@@ -2710,7 +2710,7 @@ DEFINE_SCHED_CLASS(rt) = {
+ .dequeue_task = dequeue_task_rt,
+ .yield_task = yield_task_rt,
+
+- .check_preempt_curr = check_preempt_curr_rt,
++ .wakeup_preempt = wakeup_preempt_rt,
+
+ .pick_next_task = pick_next_task_rt,
+ .put_prev_task = put_prev_task_rt,
+diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
+index 8cbbbea7fdbbd..0e289300fe78d 100644
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -2242,7 +2242,7 @@ struct sched_class {
+ void (*yield_task) (struct rq *rq);
+ bool (*yield_to_task)(struct rq *rq, struct task_struct *p);
+
+- void (*check_preempt_curr)(struct rq *rq, struct task_struct *p, int flags);
++ void (*wakeup_preempt)(struct rq *rq, struct task_struct *p, int flags);
+
+ struct task_struct *(*pick_next_task)(struct rq *rq);
+
+@@ -2516,7 +2516,7 @@ static inline void sub_nr_running(struct rq *rq, unsigned count)
+ extern void activate_task(struct rq *rq, struct task_struct *p, int flags);
+ extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags);
+
+-extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
++extern void wakeup_preempt(struct rq *rq, struct task_struct *p, int flags);
+
+ #ifdef CONFIG_PREEMPT_RT
+ #define SCHED_NR_MIGRATE_BREAK 8
+diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
+index 85590599b4d60..6cf7304e6449d 100644
+--- a/kernel/sched/stop_task.c
++++ b/kernel/sched/stop_task.c
+@@ -23,7 +23,7 @@ balance_stop(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
+ #endif /* CONFIG_SMP */
+
+ static void
+-check_preempt_curr_stop(struct rq *rq, struct task_struct *p, int flags)
++wakeup_preempt_stop(struct rq *rq, struct task_struct *p, int flags)
+ {
+ /* we're never preempted */
+ }
+@@ -120,7 +120,7 @@ DEFINE_SCHED_CLASS(stop) = {
+ .dequeue_task = dequeue_task_stop,
+ .yield_task = yield_task_stop,
+
+- .check_preempt_curr = check_preempt_curr_stop,
++ .wakeup_preempt = wakeup_preempt_stop,
+
+ .pick_next_task = pick_next_task_stop,
+ .put_prev_task = put_prev_task_stop,
+--
+2.43.0
+
--- /dev/null
+From d296877d5a57fb95178dd0e1caf9497663c1f630 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 19 Sep 2023 10:31:15 +0200
+Subject: sched/fair: Rename check_preempt_wakeup() to
+ check_preempt_wakeup_fair()
+
+From: Ingo Molnar <mingo@kernel.org>
+
+[ Upstream commit 82845683ca6a15fe8c7912c6264bb0e84ec6f5fb ]
+
+Other scheduling classes already postfix their similar methods
+with the class name.
+
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Stable-dep-of: 0664e2c311b9 ("sched/deadline: Fix warning in migrate_enable for boosted tasks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/fair.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index d1a67776ecb5d..685774895bcec 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -8255,7 +8255,7 @@ static void set_next_buddy(struct sched_entity *se)
+ /*
+ * Preempt the current task with a newly woken task if needed:
+ */
+-static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
++static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int wake_flags)
+ {
+ struct task_struct *curr = rq->curr;
+ struct sched_entity *se = &curr->se, *pse = &p->se;
+@@ -13102,7 +13102,7 @@ DEFINE_SCHED_CLASS(fair) = {
+ .yield_task = yield_task_fair,
+ .yield_to_task = yield_to_task_fair,
+
+- .check_preempt_curr = check_preempt_wakeup,
++ .check_preempt_curr = check_preempt_wakeup_fair,
+
+ .pick_next_task = __pick_next_task_fair,
+ .put_prev_task = put_prev_task_fair,
+--
+2.43.0
+
--- /dev/null
+From a2306ba0f6fc0cff62d3db4ab0a5fcb941d92de8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 7 Aug 2023 20:03:57 -0700
+Subject: sched/headers: Move 'struct sched_param' out of uapi, to work around
+ glibc/musl breakage
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Kir Kolyshkin <kolyshkin@gmail.com>
+
+[ Upstream commit d844fe65f0957024c3e1b0bf2a0615246184d9bc ]
+
+Both glibc and musl define 'struct sched_param' in sched.h, while kernel
+has it in uapi/linux/sched/types.h, making it cumbersome to use
+sched_getattr(2) or sched_setattr(2) from userspace.
+
+For example, something like this:
+
+ #include <sched.h>
+ #include <linux/sched/types.h>
+
+ struct sched_attr sa;
+
+will result in "error: redefinition of ‘struct sched_param’" (note the
+code doesn't need sched_param at all -- it needs struct sched_attr
+plus some stuff from sched.h).
+
+The situation is, glibc is not going to provide a wrapper for
+sched_{get,set}attr, thus the need to include linux/sched_types.h
+directly, which leads to the above problem.
+
+Thus, the userspace is left with a few sub-par choices when it wants to
+use e.g. sched_setattr(2), such as maintaining a copy of struct
+sched_attr definition, or using some other ugly tricks.
+
+OTOH, 'struct sched_param' is well known, defined in POSIX, and it won't
+be ever changed (as that would break backward compatibility).
+
+So, while 'struct sched_param' is indeed part of the kernel uapi,
+exposing it the way it's done now creates an issue, and hiding it
+(like this patch does) fixes that issue, hopefully without creating
+another one: common userspace software rely on libc headers, and as
+for "special" software (like libc), it looks like glibc and musl
+do not rely on kernel headers for 'struct sched_param' definition
+(but let's Cc their mailing lists in case it's otherwise).
+
+The alternative to this patch would be to move struct sched_attr to,
+say, linux/sched.h, or linux/sched/attr.h (the new file).
+
+Oh, and here is the previous attempt to fix the issue:
+
+ https://lore.kernel.org/all/20200528135552.GA87103@google.com/
+
+While I support Linus arguments, the issue is still here
+and needs to be fixed.
+
+[ mingo: Linus is right, this shouldn't be needed - but on the other
+ hand I agree that this header is not really helpful to
+ user-space as-is. So let's pretend that
+ <uapi/linux/sched/types.h> is only about sched_attr, and
+ call this commit a workaround for user-space breakage
+ that it in reality is ... Also, remove the Fixes tag. ]
+
+Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20230808030357.1213829-1-kolyshkin@gmail.com
+Stable-dep-of: 0664e2c311b9 ("sched/deadline: Fix warning in migrate_enable for boosted tasks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/sched.h | 5 ++++-
+ include/uapi/linux/sched/types.h | 4 ----
+ 2 files changed, 4 insertions(+), 5 deletions(-)
+
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index 3d83cc397eac1..323aa1aaaf91e 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -63,7 +63,6 @@ struct robust_list_head;
+ struct root_domain;
+ struct rq;
+ struct sched_attr;
+-struct sched_param;
+ struct seq_file;
+ struct sighand_struct;
+ struct signal_struct;
+@@ -370,6 +369,10 @@ extern struct root_domain def_root_domain;
+ extern struct mutex sched_domains_mutex;
+ #endif
+
++struct sched_param {
++ int sched_priority;
++};
++
+ struct sched_info {
+ #ifdef CONFIG_SCHED_INFO
+ /* Cumulative counters: */
+diff --git a/include/uapi/linux/sched/types.h b/include/uapi/linux/sched/types.h
+index f2c4589d4dbfe..90662385689bb 100644
+--- a/include/uapi/linux/sched/types.h
++++ b/include/uapi/linux/sched/types.h
+@@ -4,10 +4,6 @@
+
+ #include <linux/types.h>
+
+-struct sched_param {
+- int sched_priority;
+-};
+-
+ #define SCHED_ATTR_SIZE_VER0 48 /* sizeof first published struct */
+ #define SCHED_ATTR_SIZE_VER1 56 /* add: util_{min,max} */
+
+--
+2.43.0
+
--- /dev/null
+From 13751b8fb97f75c813b7992dba41d320a0f2489a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 6 Nov 2023 13:41:43 +0100
+Subject: sched: Remove vruntime from trace_sched_stat_runtime()
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+[ Upstream commit 5fe6ec8f6ab549b6422e41551abb51802bd48bc7 ]
+
+Tracing the runtime delta makes sense, observer can sum over time.
+Tracing the absolute vruntime makes less sense, inconsistent:
+absolute-vs-delta, but also vruntime delta can be computed from
+runtime delta.
+
+Removing the vruntime thing also makes the two tracepoint sites
+identical, allowing to unify the code in a later patch.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Stable-dep-of: 0664e2c311b9 ("sched/deadline: Fix warning in migrate_enable for boosted tasks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/trace/events/sched.h | 15 ++++++---------
+ kernel/sched/fair.c | 5 ++---
+ 2 files changed, 8 insertions(+), 12 deletions(-)
+
+diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
+index 010ba1b7cb0ea..bdb1e838954af 100644
+--- a/include/trace/events/sched.h
++++ b/include/trace/events/sched.h
+@@ -493,33 +493,30 @@ DEFINE_EVENT_SCHEDSTAT(sched_stat_template, sched_stat_blocked,
+ */
+ DECLARE_EVENT_CLASS(sched_stat_runtime,
+
+- TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
++ TP_PROTO(struct task_struct *tsk, u64 runtime),
+
+- TP_ARGS(tsk, __perf_count(runtime), vruntime),
++ TP_ARGS(tsk, __perf_count(runtime)),
+
+ TP_STRUCT__entry(
+ __array( char, comm, TASK_COMM_LEN )
+ __field( pid_t, pid )
+ __field( u64, runtime )
+- __field( u64, vruntime )
+ ),
+
+ TP_fast_assign(
+ memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+ __entry->pid = tsk->pid;
+ __entry->runtime = runtime;
+- __entry->vruntime = vruntime;
+ ),
+
+- TP_printk("comm=%s pid=%d runtime=%Lu [ns] vruntime=%Lu [ns]",
++ TP_printk("comm=%s pid=%d runtime=%Lu [ns]",
+ __entry->comm, __entry->pid,
+- (unsigned long long)__entry->runtime,
+- (unsigned long long)__entry->vruntime)
++ (unsigned long long)__entry->runtime)
+ );
+
+ DEFINE_EVENT(sched_stat_runtime, sched_stat_runtime,
+- TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
+- TP_ARGS(tsk, runtime, vruntime));
++ TP_PROTO(struct task_struct *tsk, u64 runtime),
++ TP_ARGS(tsk, runtime));
+
+ /*
+ * Tracepoint for showing priority inheritance modifying a tasks
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index 3e9333466438c..062447861d8e6 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -1185,8 +1185,7 @@ s64 update_curr_common(struct rq *rq)
+ if (unlikely(delta_exec <= 0))
+ return delta_exec;
+
+- trace_sched_stat_runtime(curr, delta_exec, 0);
+-
++ trace_sched_stat_runtime(curr, delta_exec);
+ account_group_exec_runtime(curr, delta_exec);
+ cgroup_account_cputime(curr, delta_exec);
+
+@@ -1215,7 +1214,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
+ if (entity_is_task(curr)) {
+ struct task_struct *curtask = task_of(curr);
+
+- trace_sched_stat_runtime(curtask, delta_exec, curr->vruntime);
++ trace_sched_stat_runtime(curtask, delta_exec);
+ cgroup_account_cputime(curtask, delta_exec);
+ account_group_exec_runtime(curtask, delta_exec);
+ }
+--
+2.43.0
+
--- /dev/null
+From 09e2cbee382d5b6ad440ccacd38906156dcd8720 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 6 Nov 2023 14:04:01 +0100
+Subject: sched: Unify more update_curr*()
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+[ Upstream commit c708a4dc5ab547edc3d6537233ca9e79ea30ce47 ]
+
+Now that trace_sched_stat_runtime() no longer takes a vruntime
+argument, the task specific bits are identical between
+update_curr_common() and update_curr().
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Stable-dep-of: 0664e2c311b9 ("sched/deadline: Fix warning in migrate_enable for boosted tasks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/fair.c | 24 +++++++++++-------------
+ 1 file changed, 11 insertions(+), 13 deletions(-)
+
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index 062447861d8e6..3b2cfdb8d788d 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -1173,6 +1173,13 @@ static s64 update_curr_se(struct rq *rq, struct sched_entity *curr)
+ return delta_exec;
+ }
+
++static inline void update_curr_task(struct task_struct *p, s64 delta_exec)
++{
++ trace_sched_stat_runtime(p, delta_exec);
++ account_group_exec_runtime(p, delta_exec);
++ cgroup_account_cputime(p, delta_exec);
++}
++
+ /*
+ * Used by other classes to account runtime.
+ */
+@@ -1182,12 +1189,8 @@ s64 update_curr_common(struct rq *rq)
+ s64 delta_exec;
+
+ delta_exec = update_curr_se(rq, &curr->se);
+- if (unlikely(delta_exec <= 0))
+- return delta_exec;
+-
+- trace_sched_stat_runtime(curr, delta_exec);
+- account_group_exec_runtime(curr, delta_exec);
+- cgroup_account_cputime(curr, delta_exec);
++ if (likely(delta_exec > 0))
++ update_curr_task(curr, delta_exec);
+
+ return delta_exec;
+ }
+@@ -1211,13 +1214,8 @@ static void update_curr(struct cfs_rq *cfs_rq)
+ update_deadline(cfs_rq, curr);
+ update_min_vruntime(cfs_rq);
+
+- if (entity_is_task(curr)) {
+- struct task_struct *curtask = task_of(curr);
+-
+- trace_sched_stat_runtime(curtask, delta_exec);
+- cgroup_account_cputime(curtask, delta_exec);
+- account_group_exec_runtime(curtask, delta_exec);
+- }
++ if (entity_is_task(curr))
++ update_curr_task(task_of(curr), delta_exec);
+
+ account_cfs_rq_runtime(cfs_rq, delta_exec);
+ }
+--
+2.43.0
+
--- /dev/null
+From 5a4948ae96cd5dfff1686ea0c1b446f44191e2af Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 4 Nov 2023 11:59:18 +0100
+Subject: sched: Unify runtime accounting across classes
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+[ Upstream commit 5d69eca542ee17c618f9a55da52191d5e28b435f ]
+
+All classes use sched_entity::exec_start to track runtime and have
+copies of the exact same code around to compute runtime.
+
+Collapse all that.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Daniel Bristot de Oliveira <bristot@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Phil Auld <pauld@redhat.com>
+Reviewed-by: Valentin Schneider <vschneid@redhat.com>
+Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Link: https://lkml.kernel.org/r/54d148a144f26d9559698c4dd82d8859038a7380.1699095159.git.bristot@kernel.org
+Stable-dep-of: 0664e2c311b9 ("sched/deadline: Fix warning in migrate_enable for boosted tasks")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/sched.h | 2 +-
+ kernel/sched/deadline.c | 15 +++--------
+ kernel/sched/fair.c | 57 ++++++++++++++++++++++++++++++----------
+ kernel/sched/rt.c | 15 +++--------
+ kernel/sched/sched.h | 12 ++-------
+ kernel/sched/stop_task.c | 13 +--------
+ 6 files changed, 53 insertions(+), 61 deletions(-)
+
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index 323aa1aaaf91e..4809f27b52017 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -523,7 +523,7 @@ struct sched_statistics {
+ u64 block_max;
+ s64 sum_block_runtime;
+
+- u64 exec_max;
++ s64 exec_max;
+ u64 slice_max;
+
+ u64 nr_migrations_cold;
+diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
+index 36aeaaf9ab090..6421d28553576 100644
+--- a/kernel/sched/deadline.c
++++ b/kernel/sched/deadline.c
+@@ -1299,9 +1299,8 @@ static void update_curr_dl(struct rq *rq)
+ {
+ struct task_struct *curr = rq->curr;
+ struct sched_dl_entity *dl_se = &curr->dl;
+- u64 delta_exec, scaled_delta_exec;
++ s64 delta_exec, scaled_delta_exec;
+ int cpu = cpu_of(rq);
+- u64 now;
+
+ if (!dl_task(curr) || !on_dl_rq(dl_se))
+ return;
+@@ -1314,21 +1313,13 @@ static void update_curr_dl(struct rq *rq)
+ * natural solution, but the full ramifications of this
+ * approach need further study.
+ */
+- now = rq_clock_task(rq);
+- delta_exec = now - curr->se.exec_start;
+- if (unlikely((s64)delta_exec <= 0)) {
++ delta_exec = update_curr_common(rq);
++ if (unlikely(delta_exec <= 0)) {
+ if (unlikely(dl_se->dl_yielded))
+ goto throttle;
+ return;
+ }
+
+- schedstat_set(curr->stats.exec_max,
+- max(curr->stats.exec_max, delta_exec));
+-
+- trace_sched_stat_runtime(curr, delta_exec, 0);
+-
+- update_current_exec_runtime(curr, now, delta_exec);
+-
+ if (dl_entity_is_special(dl_se))
+ return;
+
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index a32d344623716..3e9333466438c 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -1150,23 +1150,17 @@ static void update_tg_load_avg(struct cfs_rq *cfs_rq)
+ }
+ #endif /* CONFIG_SMP */
+
+-/*
+- * Update the current task's runtime statistics.
+- */
+-static void update_curr(struct cfs_rq *cfs_rq)
++static s64 update_curr_se(struct rq *rq, struct sched_entity *curr)
+ {
+- struct sched_entity *curr = cfs_rq->curr;
+- u64 now = rq_clock_task(rq_of(cfs_rq));
+- u64 delta_exec;
+-
+- if (unlikely(!curr))
+- return;
++ u64 now = rq_clock_task(rq);
++ s64 delta_exec;
+
+ delta_exec = now - curr->exec_start;
+- if (unlikely((s64)delta_exec <= 0))
+- return;
++ if (unlikely(delta_exec <= 0))
++ return delta_exec;
+
+ curr->exec_start = now;
++ curr->sum_exec_runtime += delta_exec;
+
+ if (schedstat_enabled()) {
+ struct sched_statistics *stats;
+@@ -1176,8 +1170,43 @@ static void update_curr(struct cfs_rq *cfs_rq)
+ max(delta_exec, stats->exec_max));
+ }
+
+- curr->sum_exec_runtime += delta_exec;
+- schedstat_add(cfs_rq->exec_clock, delta_exec);
++ return delta_exec;
++}
++
++/*
++ * Used by other classes to account runtime.
++ */
++s64 update_curr_common(struct rq *rq)
++{
++ struct task_struct *curr = rq->curr;
++ s64 delta_exec;
++
++ delta_exec = update_curr_se(rq, &curr->se);
++ if (unlikely(delta_exec <= 0))
++ return delta_exec;
++
++ trace_sched_stat_runtime(curr, delta_exec, 0);
++
++ account_group_exec_runtime(curr, delta_exec);
++ cgroup_account_cputime(curr, delta_exec);
++
++ return delta_exec;
++}
++
++/*
++ * Update the current task's runtime statistics.
++ */
++static void update_curr(struct cfs_rq *cfs_rq)
++{
++ struct sched_entity *curr = cfs_rq->curr;
++ s64 delta_exec;
++
++ if (unlikely(!curr))
++ return;
++
++ delta_exec = update_curr_se(rq_of(cfs_rq), curr);
++ if (unlikely(delta_exec <= 0))
++ return;
+
+ curr->vruntime += calc_delta_fair(delta_exec, curr);
+ update_deadline(cfs_rq, curr);
+diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
+index a8c47d8d51bde..b89223a973168 100644
+--- a/kernel/sched/rt.c
++++ b/kernel/sched/rt.c
+@@ -1050,24 +1050,15 @@ static void update_curr_rt(struct rq *rq)
+ {
+ struct task_struct *curr = rq->curr;
+ struct sched_rt_entity *rt_se = &curr->rt;
+- u64 delta_exec;
+- u64 now;
++ s64 delta_exec;
+
+ if (curr->sched_class != &rt_sched_class)
+ return;
+
+- now = rq_clock_task(rq);
+- delta_exec = now - curr->se.exec_start;
+- if (unlikely((s64)delta_exec <= 0))
++ delta_exec = update_curr_common(rq);
++ if (unlikely(delta_exec <= 0))
+ return;
+
+- schedstat_set(curr->stats.exec_max,
+- max(curr->stats.exec_max, delta_exec));
+-
+- trace_sched_stat_runtime(curr, delta_exec, 0);
+-
+- update_current_exec_runtime(curr, now, delta_exec);
+-
+ if (!rt_bandwidth_enabled())
+ return;
+
+diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
+index 0e289300fe78d..1d586e7576bc2 100644
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -2231,6 +2231,8 @@ struct affinity_context {
+ unsigned int flags;
+ };
+
++extern s64 update_curr_common(struct rq *rq);
++
+ struct sched_class {
+
+ #ifdef CONFIG_UCLAMP_TASK
+@@ -3283,16 +3285,6 @@ extern int sched_dynamic_mode(const char *str);
+ extern void sched_dynamic_update(int mode);
+ #endif
+
+-static inline void update_current_exec_runtime(struct task_struct *curr,
+- u64 now, u64 delta_exec)
+-{
+- curr->se.sum_exec_runtime += delta_exec;
+- account_group_exec_runtime(curr, delta_exec);
+-
+- curr->se.exec_start = now;
+- cgroup_account_cputime(curr, delta_exec);
+-}
+-
+ #ifdef CONFIG_SCHED_MM_CID
+
+ #define SCHED_MM_CID_PERIOD_NS (100ULL * 1000000) /* 100ms */
+diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
+index 6cf7304e6449d..b1b8fe61c532a 100644
+--- a/kernel/sched/stop_task.c
++++ b/kernel/sched/stop_task.c
+@@ -70,18 +70,7 @@ static void yield_task_stop(struct rq *rq)
+
+ static void put_prev_task_stop(struct rq *rq, struct task_struct *prev)
+ {
+- struct task_struct *curr = rq->curr;
+- u64 now, delta_exec;
+-
+- now = rq_clock_task(rq);
+- delta_exec = now - curr->se.exec_start;
+- if (unlikely((s64)delta_exec < 0))
+- delta_exec = 0;
+-
+- schedstat_set(curr->stats.exec_max,
+- max(curr->stats.exec_max, delta_exec));
+-
+- update_current_exec_runtime(curr, now, delta_exec);
++ update_curr_common(rq);
+ }
+
+ /*
+--
+2.43.0
+
sched-numa-fix-memory-leak-due-to-the-overwritten-vm.patch
mempolicy-fix-migrate_pages-2-syscall-return-nr_fail.patch
mm-mempolicy-fix-migrate_to_node-assuming-there-is-a.patch
+sched-core-remove-the-unnecessary-need_resched-check.patch
+sched-fair-check-idle_cpu-before-need_resched-to-det.patch
+sched-core-prevent-wakeup-of-ksoftirqd-during-idle-l.patch
+sched-fair-rename-check_preempt_wakeup-to-check_pree.patch
+sched-fair-rename-check_preempt_curr-to-wakeup_preem.patch
+sched-headers-move-struct-sched_param-out-of-uapi-to.patch
+sched-unify-runtime-accounting-across-classes.patch
+sched-remove-vruntime-from-trace_sched_stat_runtime.patch
+sched-unify-more-update_curr.patch
+sched-deadline-collect-sched_dl_entity-initializatio.patch
+sched-deadline-move-bandwidth-accounting-into-en-de-.patch
+sched-deadline-fix-warning-in-migrate_enable-for-boo.patch
+btrfs-fix-missing-snapshot-drew-unlock-when-root-is-.patch
+clk-en7523-initialize-num-before-accessing-hws-in-en.patch
+tracing-eprobe-fix-to-release-eprobe-when-failed-to-.patch
+x86-fix-build-regression-with-config_kexec_jump-enab.patch
--- /dev/null
+From 51fd9728d77266aed55052263b11152f334cdf7f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 30 Nov 2024 01:47:47 +0900
+Subject: tracing/eprobe: Fix to release eprobe when failed to add dyn_event
+
+From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+
+[ Upstream commit 494b332064c0ce2f7392fa92632bc50191c1b517 ]
+
+Fix eprobe event to unregister event call and release eprobe when it fails
+to add dynamic event correctly.
+
+Link: https://lore.kernel.org/all/173289886698.73724.1959899350183686006.stgit@devnote2/
+
+Fixes: 7491e2c44278 ("tracing: Add a probe that attaches to trace events")
+Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/trace/trace_eprobe.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c
+index 31bb977670bdf..f1f12ce5efb0a 100644
+--- a/kernel/trace/trace_eprobe.c
++++ b/kernel/trace/trace_eprobe.c
+@@ -966,6 +966,11 @@ static int __trace_eprobe_create(int argc, const char *argv[])
+ goto error;
+ }
+ ret = dyn_event_add(&ep->devent, &ep->tp.event->call);
++ if (ret < 0) {
++ trace_probe_unregister_event_call(&ep->tp);
++ mutex_unlock(&event_mutex);
++ goto error;
++ }
+ mutex_unlock(&event_mutex);
+ return ret;
+ parse_error:
+--
+2.43.0
+
--- /dev/null
+From a62c939147c690ad56e6161b3cc45a510ed76f50 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 9 Dec 2024 08:53:32 +0900
+Subject: x86: Fix build regression with CONFIG_KEXEC_JUMP enabled
+
+From: Damien Le Moal <dlemoal@kernel.org>
+
+[ Upstream commit aeb68937614f4aeceaaa762bd7f0212ce842b797 ]
+
+Build 6.13-rc12 for x86_64 with gcc 14.2.1 fails with the error:
+
+ ld: vmlinux.o: in function `virtual_mapped':
+ linux/arch/x86/kernel/relocate_kernel_64.S:249:(.text+0x5915b): undefined reference to `saved_context_gdt_desc'
+
+when CONFIG_KEXEC_JUMP is enabled.
+
+This was introduced by commit 07fa619f2a40 ("x86/kexec: Restore GDT on
+return from ::preserve_context kexec") which introduced a use of
+saved_context_gdt_desc without a declaration for it.
+
+Fix that by including asm/asm-offsets.h where saved_context_gdt_desc
+is defined (indirectly in include/generated/asm-offsets.h which
+asm/asm-offsets.h includes).
+
+Fixes: 07fa619f2a40 ("x86/kexec: Restore GDT on return from ::preserve_context kexec")
+Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
+Acked-by: Borislav Petkov (AMD) <bp@alien8.de>
+Acked-by: David Woodhouse <dwmw@amazon.co.uk>
+Closes: https://lore.kernel.org/oe-kbuild-all/202411270006.ZyyzpYf8-lkp@intel.com/
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kernel/relocate_kernel_64.S | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
+index 569d5577059db..fb00a9e8b0879 100644
+--- a/arch/x86/kernel/relocate_kernel_64.S
++++ b/arch/x86/kernel/relocate_kernel_64.S
+@@ -11,6 +11,7 @@
+ #include <asm/pgtable_types.h>
+ #include <asm/nospec-branch.h>
+ #include <asm/unwind_hints.h>
++#include <asm/asm-offsets.h>
+
+ /*
+ * Must be relocatable PIC code callable as a C function, in particular
+--
+2.43.0
+