--- /dev/null
+From 7f87f0b776b9a3722815a1bc1b527e3d1c90f646 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 27 Aug 2025 12:10:28 +0100
+Subject: btrfs: always drop log root tree reference in btrfs_replay_log()
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit 2f5b8095ea47b142c56c09755a8b1e14145a2d30 ]
+
+Currently we have this odd behaviour:
+
+1) At btrfs_replay_log() we drop the reference of the log root tree if
+ the call to btrfs_recover_log_trees() failed;
+
+2) But if the call to btrfs_recover_log_trees() did not fail, we don't
+ drop the reference in btrfs_replay_log() - we expect that
+ btrfs_recover_log_trees() does it in case it returns success.
+
+Let's simplify this and make btrfs_replay_log() always drop the reference
+on the log root tree, not only this simplifies code as it's what makes
+sense since it's btrfs_replay_log() who grabbed the reference in the first
+place.
+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/disk-io.c | 2 +-
+ fs/btrfs/tree-log.c | 1 -
+ 2 files changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
+index 91475cb7d568b..29f0ba4adfbce 100644
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -2309,10 +2309,10 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
+ }
+ /* returns with log_tree_root freed on success */
+ ret = btrfs_recover_log_trees(log_tree_root);
++ btrfs_put_root(log_tree_root);
+ if (ret) {
+ btrfs_handle_fs_error(fs_info, ret,
+ "Failed to recover log tree");
+- btrfs_put_root(log_tree_root);
+ return ret;
+ }
+
+diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
+index 6d715bb773643..cdb5a2770faf3 100644
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -6432,7 +6432,6 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
+
+ log_root_tree->log_root = NULL;
+ clear_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags);
+- btrfs_put_root(log_root_tree);
+
+ return 0;
+ error:
+--
+2.51.0
+
--- /dev/null
+From 581461587a3316a3c933c4e90962e0a37cad44a6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Sep 2025 12:09:14 +0100
+Subject: btrfs: use smp_mb__after_atomic() when forcing COW in
+ create_pending_snapshot()
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit 45c222468d33202c07c41c113301a4b9c8451b8f ]
+
+After setting the BTRFS_ROOT_FORCE_COW flag on the root we are doing a
+full write barrier, smp_wmb(), but we don't need to, all we need is a
+smp_mb__after_atomic(). The use of the smp_wmb() is from the old days
+when we didn't use a bit and used instead an int field in the root to
+signal if cow is forced. After the int field was changed to a bit in
+the root's state (flags field), we forgot to update the memory barrier
+in create_pending_snapshot() to smp_mb__after_atomic(), but we did the
+change in commit_fs_roots() after clearing BTRFS_ROOT_FORCE_COW. That
+happened in commit 27cdeb7096b8 ("Btrfs: use bitfield instead of integer
+data type for the some variants in btrfs_root"). On the reader side, in
+should_cow_block(), we also use the counterpart smp_mb__before_atomic()
+which generates further confusion.
+
+So change the smp_wmb() to smp_mb__after_atomic(). In fact we don't
+even need any barrier at all since create_pending_snapshot() is called
+in the critical section of a transaction commit and therefore no one
+can concurrently join/attach the transaction, or start a new one, until
+the transaction is unblocked. By the time someone starts a new transaction
+and enters should_cow_block(), a lot of implicit memory barriers already
+took place by having acquired several locks such as fs_info->trans_lock
+and extent buffer locks on the root node at least. Nevertlheless, for
+consistency use smp_mb__after_atomic() after setting the force cow bit
+in create_pending_snapshot().
+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/transaction.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
+index f68cfcc1f8300..d558f354b8b82 100644
+--- a/fs/btrfs/transaction.c
++++ b/fs/btrfs/transaction.c
+@@ -1660,7 +1660,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
+ }
+ /* see comments in should_cow_block() */
+ set_bit(BTRFS_ROOT_FORCE_COW, &root->state);
+- smp_wmb();
++ smp_mb__after_atomic();
+
+ btrfs_set_root_node(new_root_item, tmp);
+ /* record when the snapshot was created in key.offset */
+--
+2.51.0
+
net-sched-sch_qfq-fix-null-deref-in-agg_dequeue.patch
+x86-bugs-fix-reporting-of-lfence-retpoline.patch
+btrfs-always-drop-log-root-tree-reference-in-btrfs_r.patch
+btrfs-use-smp_mb__after_atomic-when-forcing-cow-in-c.patch
--- /dev/null
+From b65ea0af0079f32b432165b33550d4e95b18c8ed Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 15 Sep 2025 08:47:05 -0500
+Subject: x86/bugs: Fix reporting of LFENCE retpoline
+
+From: David Kaplan <david.kaplan@amd.com>
+
+[ Upstream commit d1cc1baef67ac6c09b74629ca053bf3fb812f7dc ]
+
+The LFENCE retpoline mitigation is not secure but the kernel prints
+inconsistent messages about this fact. The dmesg log says 'Mitigation:
+LFENCE', implying the system is mitigated. But sysfs reports 'Vulnerable:
+LFENCE' implying the system (correctly) is not mitigated.
+
+Fix this by printing a consistent 'Vulnerable: LFENCE' string everywhere
+when this mitigation is selected.
+
+Signed-off-by: David Kaplan <david.kaplan@amd.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://lore.kernel.org/20250915134706.3201818-1-david.kaplan@amd.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kernel/cpu/bugs.c | 5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
+index 8794e3f4974b3..57ba697e29180 100644
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -1508,7 +1508,7 @@ spectre_v2_user_select_mitigation(void)
+ static const char * const spectre_v2_strings[] = {
+ [SPECTRE_V2_NONE] = "Vulnerable",
+ [SPECTRE_V2_RETPOLINE] = "Mitigation: Retpolines",
+- [SPECTRE_V2_LFENCE] = "Mitigation: LFENCE",
++ [SPECTRE_V2_LFENCE] = "Vulnerable: LFENCE",
+ [SPECTRE_V2_EIBRS] = "Mitigation: Enhanced / Automatic IBRS",
+ [SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced / Automatic IBRS + LFENCE",
+ [SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced / Automatic IBRS + Retpolines",
+@@ -3011,9 +3011,6 @@ static char *pbrsb_eibrs_state(void)
+
+ static ssize_t spectre_v2_show_state(char *buf)
+ {
+- if (spectre_v2_enabled == SPECTRE_V2_LFENCE)
+- return sysfs_emit(buf, "Vulnerable: LFENCE\n");
+-
+ if (spectre_v2_enabled == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled())
+ return sysfs_emit(buf, "Vulnerable: eIBRS with unprivileged eBPF\n");
+
+--
+2.51.0
+
--- /dev/null
+From 8b977547797ba2015b11f98dbf944df250edf3c0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 27 Aug 2025 12:10:28 +0100
+Subject: btrfs: always drop log root tree reference in btrfs_replay_log()
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit 2f5b8095ea47b142c56c09755a8b1e14145a2d30 ]
+
+Currently we have this odd behaviour:
+
+1) At btrfs_replay_log() we drop the reference of the log root tree if
+ the call to btrfs_recover_log_trees() failed;
+
+2) But if the call to btrfs_recover_log_trees() did not fail, we don't
+ drop the reference in btrfs_replay_log() - we expect that
+ btrfs_recover_log_trees() does it in case it returns success.
+
+Let's simplify this and make btrfs_replay_log() always drop the reference
+on the log root tree, not only this simplifies code as it's what makes
+sense since it's btrfs_replay_log() who grabbed the reference in the first
+place.
+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/disk-io.c | 2 +-
+ fs/btrfs/tree-log.c | 1 -
+ 2 files changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
+index 9c2d6f96f46da..136902f27e441 100644
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -2387,10 +2387,10 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
+ }
+ /* returns with log_tree_root freed on success */
+ ret = btrfs_recover_log_trees(log_tree_root);
++ btrfs_put_root(log_tree_root);
+ if (ret) {
+ btrfs_handle_fs_error(fs_info, ret,
+ "Failed to recover log tree");
+- btrfs_put_root(log_tree_root);
+ return ret;
+ }
+
+diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
+index 34fedac4e1864..445c7a5641b62 100644
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -6513,7 +6513,6 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
+
+ log_root_tree->log_root = NULL;
+ clear_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags);
+- btrfs_put_root(log_root_tree);
+
+ return 0;
+ error:
+--
+2.51.0
+
--- /dev/null
+From f476a1c6de77130f5290f607f2fd2094dd813876 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 1 Sep 2025 17:01:44 +0200
+Subject: btrfs: scrub: replace max_t()/min_t() with clamp() in
+ scrub_throttle_dev_io()
+
+From: Thorsten Blum <thorsten.blum@linux.dev>
+
+[ Upstream commit a7f3dfb8293c4cee99743132d69863a92e8f4875 ]
+
+Replace max_t() followed by min_t() with a single clamp().
+
+As was pointed by David Laight in
+https://lore.kernel.org/linux-btrfs/20250906122458.75dfc8f0@pumpkin/
+the calculation may overflow u32 when the input value is too large, so
+clamp_t() is not used. In practice the expected values are in range of
+megabytes to gigabytes (throughput limit) so the bug would not happen.
+
+Signed-off-by: Thorsten Blum <thorsten.blum@linux.dev>
+Reviewed-by: David Sterba <dsterba@suse.com>
+[ Use clamp() and add explanation. ]
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/scrub.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
+index 6ffd34d39e992..aac4ee5880952 100644
+--- a/fs/btrfs/scrub.c
++++ b/fs/btrfs/scrub.c
+@@ -2017,8 +2017,7 @@ static void scrub_throttle(struct scrub_ctx *sctx)
+ * Slice is divided into intervals when the IO is submitted, adjust by
+ * bwlimit and maximum of 64 intervals.
+ */
+- div = max_t(u32, 1, (u32)(bwlimit / (16 * 1024 * 1024)));
+- div = min_t(u32, 64, div);
++ div = clamp(bwlimit / (16 * 1024 * 1024), 1, 64);
+
+ /* Start new epoch, set deadline */
+ now = ktime_get();
+--
+2.51.0
+
--- /dev/null
+From 9bd34bcc56053310ef3ea6b6c0255bd75c8227be Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Sep 2025 12:09:14 +0100
+Subject: btrfs: use smp_mb__after_atomic() when forcing COW in
+ create_pending_snapshot()
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit 45c222468d33202c07c41c113301a4b9c8451b8f ]
+
+After setting the BTRFS_ROOT_FORCE_COW flag on the root we are doing a
+full write barrier, smp_wmb(), but we don't need to, all we need is a
+smp_mb__after_atomic(). The use of the smp_wmb() is from the old days
+when we didn't use a bit and used instead an int field in the root to
+signal if cow is forced. After the int field was changed to a bit in
+the root's state (flags field), we forgot to update the memory barrier
+in create_pending_snapshot() to smp_mb__after_atomic(), but we did the
+change in commit_fs_roots() after clearing BTRFS_ROOT_FORCE_COW. That
+happened in commit 27cdeb7096b8 ("Btrfs: use bitfield instead of integer
+data type for the some variants in btrfs_root"). On the reader side, in
+should_cow_block(), we also use the counterpart smp_mb__before_atomic()
+which generates further confusion.
+
+So change the smp_wmb() to smp_mb__after_atomic(). In fact we don't
+even need any barrier at all since create_pending_snapshot() is called
+in the critical section of a transaction commit and therefore no one
+can concurrently join/attach the transaction, or start a new one, until
+the transaction is unblocked. By the time someone starts a new transaction
+and enters should_cow_block(), a lot of implicit memory barriers already
+took place by having acquired several locks such as fs_info->trans_lock
+and extent buffer locks on the root node at least. Nevertlheless, for
+consistency use smp_mb__after_atomic() after setting the force cow bit
+in create_pending_snapshot().
+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/transaction.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
+index 4fb5e12c87d1b..d96221ed835e9 100644
+--- a/fs/btrfs/transaction.c
++++ b/fs/btrfs/transaction.c
+@@ -1765,7 +1765,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
+ }
+ /* see comments in should_cow_block() */
+ set_bit(BTRFS_ROOT_FORCE_COW, &root->state);
+- smp_wmb();
++ smp_mb__after_atomic();
+
+ btrfs_set_root_node(new_root_item, tmp);
+ /* record when the snapshot was created in key.offset */
+--
+2.51.0
+
net-sched-sch_qfq-fix-null-deref-in-agg_dequeue.patch
+x86-bugs-fix-reporting-of-lfence-retpoline.patch
+btrfs-scrub-replace-max_t-min_t-with-clamp-in-scrub_.patch
+btrfs-always-drop-log-root-tree-reference-in-btrfs_r.patch
+btrfs-use-smp_mb__after_atomic-when-forcing-cow-in-c.patch
--- /dev/null
+From 28fcb9a170c8be4e2920ecb17a4b5f15e5681b81 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 15 Sep 2025 08:47:05 -0500
+Subject: x86/bugs: Fix reporting of LFENCE retpoline
+
+From: David Kaplan <david.kaplan@amd.com>
+
+[ Upstream commit d1cc1baef67ac6c09b74629ca053bf3fb812f7dc ]
+
+The LFENCE retpoline mitigation is not secure but the kernel prints
+inconsistent messages about this fact. The dmesg log says 'Mitigation:
+LFENCE', implying the system is mitigated. But sysfs reports 'Vulnerable:
+LFENCE' implying the system (correctly) is not mitigated.
+
+Fix this by printing a consistent 'Vulnerable: LFENCE' string everywhere
+when this mitigation is selected.
+
+Signed-off-by: David Kaplan <david.kaplan@amd.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://lore.kernel.org/20250915134706.3201818-1-david.kaplan@amd.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kernel/cpu/bugs.c | 5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
+index 1628c00145892..8df48691f4910 100644
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -1539,7 +1539,7 @@ spectre_v2_user_select_mitigation(void)
+ static const char * const spectre_v2_strings[] = {
+ [SPECTRE_V2_NONE] = "Vulnerable",
+ [SPECTRE_V2_RETPOLINE] = "Mitigation: Retpolines",
+- [SPECTRE_V2_LFENCE] = "Mitigation: LFENCE",
++ [SPECTRE_V2_LFENCE] = "Vulnerable: LFENCE",
+ [SPECTRE_V2_EIBRS] = "Mitigation: Enhanced / Automatic IBRS",
+ [SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced / Automatic IBRS + LFENCE",
+ [SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced / Automatic IBRS + Retpolines",
+@@ -3168,9 +3168,6 @@ static const char *spectre_bhi_state(void)
+
+ static ssize_t spectre_v2_show_state(char *buf)
+ {
+- if (spectre_v2_enabled == SPECTRE_V2_LFENCE)
+- return sysfs_emit(buf, "Vulnerable: LFENCE\n");
+-
+ if (spectre_v2_enabled == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled())
+ return sysfs_emit(buf, "Vulnerable: eIBRS with unprivileged eBPF\n");
+
+--
+2.51.0
+
--- /dev/null
+From 2524d3603db07d1acaf0af4f49c597e74dcd7b07 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Sep 2025 12:09:14 +0100
+Subject: btrfs: use smp_mb__after_atomic() when forcing COW in
+ create_pending_snapshot()
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit 45c222468d33202c07c41c113301a4b9c8451b8f ]
+
+After setting the BTRFS_ROOT_FORCE_COW flag on the root we are doing a
+full write barrier, smp_wmb(), but we don't need to, all we need is a
+smp_mb__after_atomic(). The use of the smp_wmb() is from the old days
+when we didn't use a bit and used instead an int field in the root to
+signal if cow is forced. After the int field was changed to a bit in
+the root's state (flags field), we forgot to update the memory barrier
+in create_pending_snapshot() to smp_mb__after_atomic(), but we did the
+change in commit_fs_roots() after clearing BTRFS_ROOT_FORCE_COW. That
+happened in commit 27cdeb7096b8 ("Btrfs: use bitfield instead of integer
+data type for the some variants in btrfs_root"). On the reader side, in
+should_cow_block(), we also use the counterpart smp_mb__before_atomic()
+which generates further confusion.
+
+So change the smp_wmb() to smp_mb__after_atomic(). In fact we don't
+even need any barrier at all since create_pending_snapshot() is called
+in the critical section of a transaction commit and therefore no one
+can concurrently join/attach the transaction, or start a new one, until
+the transaction is unblocked. By the time someone starts a new transaction
+and enters should_cow_block(), a lot of implicit memory barriers already
+took place by having acquired several locks such as fs_info->trans_lock
+and extent buffer locks on the root node at least. Nevertlheless, for
+consistency use smp_mb__after_atomic() after setting the force cow bit
+in create_pending_snapshot().
+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/transaction.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
+index 094b024bbf0cf..6618b42defed7 100644
+--- a/fs/btrfs/transaction.c
++++ b/fs/btrfs/transaction.c
+@@ -1546,7 +1546,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
+ }
+ /* see comments in should_cow_block() */
+ set_bit(BTRFS_ROOT_FORCE_COW, &root->state);
+- smp_wmb();
++ smp_mb__after_atomic();
+
+ btrfs_set_root_node(new_root_item, tmp);
+ /* record when the snapshot was created in key.offset */
+--
+2.51.0
+
net-sched-sch_qfq-fix-null-deref-in-agg_dequeue.patch
+x86-bugs-fix-reporting-of-lfence-retpoline.patch
+btrfs-use-smp_mb__after_atomic-when-forcing-cow-in-c.patch
--- /dev/null
+From e39da45843868dd70b02049cb59d491ab47105f3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 15 Sep 2025 08:47:05 -0500
+Subject: x86/bugs: Fix reporting of LFENCE retpoline
+
+From: David Kaplan <david.kaplan@amd.com>
+
+[ Upstream commit d1cc1baef67ac6c09b74629ca053bf3fb812f7dc ]
+
+The LFENCE retpoline mitigation is not secure but the kernel prints
+inconsistent messages about this fact. The dmesg log says 'Mitigation:
+LFENCE', implying the system is mitigated. But sysfs reports 'Vulnerable:
+LFENCE' implying the system (correctly) is not mitigated.
+
+Fix this by printing a consistent 'Vulnerable: LFENCE' string everywhere
+when this mitigation is selected.
+
+Signed-off-by: David Kaplan <david.kaplan@amd.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://lore.kernel.org/20250915134706.3201818-1-david.kaplan@amd.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kernel/cpu/bugs.c | 5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
+index 4f803aed2ef0e..b10e257799c16 100644
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -1188,7 +1188,7 @@ spectre_v2_user_select_mitigation(void)
+ static const char * const spectre_v2_strings[] = {
+ [SPECTRE_V2_NONE] = "Vulnerable",
+ [SPECTRE_V2_RETPOLINE] = "Mitigation: Retpolines",
+- [SPECTRE_V2_LFENCE] = "Mitigation: LFENCE",
++ [SPECTRE_V2_LFENCE] = "Vulnerable: LFENCE",
+ [SPECTRE_V2_EIBRS] = "Mitigation: Enhanced / Automatic IBRS",
+ [SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced / Automatic IBRS + LFENCE",
+ [SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced / Automatic IBRS + Retpolines",
+@@ -2280,9 +2280,6 @@ static char *pbrsb_eibrs_state(void)
+
+ static ssize_t spectre_v2_show_state(char *buf)
+ {
+- if (spectre_v2_enabled == SPECTRE_V2_LFENCE)
+- return sysfs_emit(buf, "Vulnerable: LFENCE\n");
+-
+ if (spectre_v2_enabled == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled())
+ return sysfs_emit(buf, "Vulnerable: eIBRS with unprivileged eBPF\n");
+
+--
+2.51.0
+
--- /dev/null
+From ffbe1930ee87e820eb9bd3809625807a5535f61c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 17 Sep 2025 14:09:13 +0800
+Subject: arch: Add the macro COMPILE_OFFSETS to all the asm-offsets.c
+
+From: Menglong Dong <menglong8.dong@gmail.com>
+
+[ Upstream commit 35561bab768977c9e05f1f1a9bc00134c85f3e28 ]
+
+The include/generated/asm-offsets.h is generated in Kbuild during
+compiling from arch/SRCARCH/kernel/asm-offsets.c. When we want to
+generate another similar offset header file, circular dependency can
+happen.
+
+For example, we want to generate a offset file include/generated/test.h,
+which is included in include/sched/sched.h. If we generate asm-offsets.h
+first, it will fail, as include/sched/sched.h is included in asm-offsets.c
+and include/generated/test.h doesn't exist; If we generate test.h first,
+it can't success neither, as include/generated/asm-offsets.h is included
+by it.
+
+In x86_64, the macro COMPILE_OFFSETS is used to avoid such circular
+dependency. We can generate asm-offsets.h first, and if the
+COMPILE_OFFSETS is defined, we don't include the "generated/test.h".
+
+And we define the macro COMPILE_OFFSETS for all the asm-offsets.c for this
+purpose.
+
+Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/alpha/kernel/asm-offsets.c | 1 +
+ arch/arc/kernel/asm-offsets.c | 1 +
+ arch/arm/kernel/asm-offsets.c | 2 ++
+ arch/arm64/kernel/asm-offsets.c | 1 +
+ arch/csky/kernel/asm-offsets.c | 1 +
+ arch/hexagon/kernel/asm-offsets.c | 1 +
+ arch/loongarch/kernel/asm-offsets.c | 2 ++
+ arch/m68k/kernel/asm-offsets.c | 1 +
+ arch/microblaze/kernel/asm-offsets.c | 1 +
+ arch/mips/kernel/asm-offsets.c | 2 ++
+ arch/nios2/kernel/asm-offsets.c | 1 +
+ arch/openrisc/kernel/asm-offsets.c | 1 +
+ arch/parisc/kernel/asm-offsets.c | 1 +
+ arch/powerpc/kernel/asm-offsets.c | 1 +
+ arch/riscv/kernel/asm-offsets.c | 1 +
+ arch/s390/kernel/asm-offsets.c | 1 +
+ arch/sh/kernel/asm-offsets.c | 1 +
+ arch/sparc/kernel/asm-offsets.c | 1 +
+ arch/um/kernel/asm-offsets.c | 2 ++
+ arch/xtensa/kernel/asm-offsets.c | 1 +
+ 20 files changed, 24 insertions(+)
+
+diff --git a/arch/alpha/kernel/asm-offsets.c b/arch/alpha/kernel/asm-offsets.c
+index 05d9296af5ea6..a251f1bc74acf 100644
+--- a/arch/alpha/kernel/asm-offsets.c
++++ b/arch/alpha/kernel/asm-offsets.c
+@@ -4,6 +4,7 @@
+ * This code generates raw asm output which is post-processed to extract
+ * and format the required data.
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/types.h>
+ #include <linux/stddef.h>
+diff --git a/arch/arc/kernel/asm-offsets.c b/arch/arc/kernel/asm-offsets.c
+index 0e884036ab743..897dcfc7c9fa0 100644
+--- a/arch/arc/kernel/asm-offsets.c
++++ b/arch/arc/kernel/asm-offsets.c
+@@ -2,6 +2,7 @@
+ /*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/sched.h>
+ #include <linux/mm.h>
+diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
+index 2c8d76fd7c662..820bc05685bab 100644
+--- a/arch/arm/kernel/asm-offsets.c
++++ b/arch/arm/kernel/asm-offsets.c
+@@ -7,6 +7,8 @@
+ * This code generates raw asm output which is post-processed to extract
+ * and format the required data.
+ */
++#define COMPILE_OFFSETS
++
+ #include <linux/compiler.h>
+ #include <linux/sched.h>
+ #include <linux/mm.h>
+diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
+index 1197e7679882e..4785e8947f520 100644
+--- a/arch/arm64/kernel/asm-offsets.c
++++ b/arch/arm64/kernel/asm-offsets.c
+@@ -6,6 +6,7 @@
+ * 2001-2002 Keith Owens
+ * Copyright (C) 2012 ARM Ltd.
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/arm_sdei.h>
+ #include <linux/sched.h>
+diff --git a/arch/csky/kernel/asm-offsets.c b/arch/csky/kernel/asm-offsets.c
+index d1e9035794733..5525c8e7e1d9e 100644
+--- a/arch/csky/kernel/asm-offsets.c
++++ b/arch/csky/kernel/asm-offsets.c
+@@ -1,5 +1,6 @@
+ // SPDX-License-Identifier: GPL-2.0
+ // Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
++#define COMPILE_OFFSETS
+
+ #include <linux/sched.h>
+ #include <linux/kernel_stat.h>
+diff --git a/arch/hexagon/kernel/asm-offsets.c b/arch/hexagon/kernel/asm-offsets.c
+index 03a7063f94561..50eea9fa6f137 100644
+--- a/arch/hexagon/kernel/asm-offsets.c
++++ b/arch/hexagon/kernel/asm-offsets.c
+@@ -8,6 +8,7 @@
+ *
+ * Copyright (c) 2010-2012, The Linux Foundation. All rights reserved.
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/compat.h>
+ #include <linux/types.h>
+diff --git a/arch/loongarch/kernel/asm-offsets.c b/arch/loongarch/kernel/asm-offsets.c
+index bdd88eda9513f..91b3eae9414f7 100644
+--- a/arch/loongarch/kernel/asm-offsets.c
++++ b/arch/loongarch/kernel/asm-offsets.c
+@@ -4,6 +4,8 @@
+ *
+ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
+ */
++#define COMPILE_OFFSETS
++
+ #include <linux/types.h>
+ #include <linux/sched.h>
+ #include <linux/mm.h>
+diff --git a/arch/m68k/kernel/asm-offsets.c b/arch/m68k/kernel/asm-offsets.c
+index 906d732305374..67a1990f9d748 100644
+--- a/arch/m68k/kernel/asm-offsets.c
++++ b/arch/m68k/kernel/asm-offsets.c
+@@ -9,6 +9,7 @@
+ * #defines from the assembly-language output.
+ */
+
++#define COMPILE_OFFSETS
+ #define ASM_OFFSETS_C
+
+ #include <linux/stddef.h>
+diff --git a/arch/microblaze/kernel/asm-offsets.c b/arch/microblaze/kernel/asm-offsets.c
+index 104c3ac5f30c8..b4b67d58e7f6a 100644
+--- a/arch/microblaze/kernel/asm-offsets.c
++++ b/arch/microblaze/kernel/asm-offsets.c
+@@ -7,6 +7,7 @@
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/init.h>
+ #include <linux/stddef.h>
+diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c
+index 08342b9eccdbd..0f9ed454faf19 100644
+--- a/arch/mips/kernel/asm-offsets.c
++++ b/arch/mips/kernel/asm-offsets.c
+@@ -9,6 +9,8 @@
+ * Kevin Kissell, kevink@mips.com and Carsten Langgaard, carstenl@mips.com
+ * Copyright (C) 2000 MIPS Technologies, Inc.
+ */
++#define COMPILE_OFFSETS
++
+ #include <linux/compat.h>
+ #include <linux/types.h>
+ #include <linux/sched.h>
+diff --git a/arch/nios2/kernel/asm-offsets.c b/arch/nios2/kernel/asm-offsets.c
+index e3d9b7b6fb48a..88190b503ce5d 100644
+--- a/arch/nios2/kernel/asm-offsets.c
++++ b/arch/nios2/kernel/asm-offsets.c
+@@ -2,6 +2,7 @@
+ /*
+ * Copyright (C) 2011 Tobias Klauser <tklauser@distanz.ch>
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/stddef.h>
+ #include <linux/sched.h>
+diff --git a/arch/openrisc/kernel/asm-offsets.c b/arch/openrisc/kernel/asm-offsets.c
+index 710651d5aaae1..3cc826f2216b1 100644
+--- a/arch/openrisc/kernel/asm-offsets.c
++++ b/arch/openrisc/kernel/asm-offsets.c
+@@ -18,6 +18,7 @@
+ * compile this file to assembler, and then extract the
+ * #defines from the assembly-language output.
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/signal.h>
+ #include <linux/sched.h>
+diff --git a/arch/parisc/kernel/asm-offsets.c b/arch/parisc/kernel/asm-offsets.c
+index 94652e13c2603..21e900c0aa958 100644
+--- a/arch/parisc/kernel/asm-offsets.c
++++ b/arch/parisc/kernel/asm-offsets.c
+@@ -13,6 +13,7 @@
+ * Copyright (C) 2002 Randolph Chung <tausq with parisc-linux.org>
+ * Copyright (C) 2003 James Bottomley <jejb at parisc-linux.org>
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/types.h>
+ #include <linux/sched.h>
+diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
+index 65d79dd0c92ce..5a4edc1e5504f 100644
+--- a/arch/powerpc/kernel/asm-offsets.c
++++ b/arch/powerpc/kernel/asm-offsets.c
+@@ -8,6 +8,7 @@
+ * compile this file to assembler, and then extract the
+ * #defines from the assembly-language output.
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/compat.h>
+ #include <linux/signal.h>
+diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
+index 1ecafbcee9a0a..21f034b3fdbeb 100644
+--- a/arch/riscv/kernel/asm-offsets.c
++++ b/arch/riscv/kernel/asm-offsets.c
+@@ -3,6 +3,7 @@
+ * Copyright (C) 2012 Regents of the University of California
+ * Copyright (C) 2017 SiFive
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/kbuild.h>
+ #include <linux/mm.h>
+diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
+index d8ce965c0a97c..9ff68c7f61cc0 100644
+--- a/arch/s390/kernel/asm-offsets.c
++++ b/arch/s390/kernel/asm-offsets.c
+@@ -4,6 +4,7 @@
+ * This code generates raw asm output which is post-processed to extract
+ * and format the required data.
+ */
++#define COMPILE_OFFSETS
+
+ #define ASM_OFFSETS_C
+
+diff --git a/arch/sh/kernel/asm-offsets.c b/arch/sh/kernel/asm-offsets.c
+index a0322e8328456..429b6a7631468 100644
+--- a/arch/sh/kernel/asm-offsets.c
++++ b/arch/sh/kernel/asm-offsets.c
+@@ -8,6 +8,7 @@
+ * compile this file to assembler, and then extract the
+ * #defines from the assembly-language output.
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/stddef.h>
+ #include <linux/types.h>
+diff --git a/arch/sparc/kernel/asm-offsets.c b/arch/sparc/kernel/asm-offsets.c
+index 5784f2df489a4..f1e27a7f800f4 100644
+--- a/arch/sparc/kernel/asm-offsets.c
++++ b/arch/sparc/kernel/asm-offsets.c
+@@ -10,6 +10,7 @@
+ *
+ * On sparc, thread_info data is static and TI_XXX offsets are computed by hand.
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/sched.h>
+ #include <linux/mm_types.h>
+diff --git a/arch/um/kernel/asm-offsets.c b/arch/um/kernel/asm-offsets.c
+index 1fb12235ab9c8..a69873aa697f4 100644
+--- a/arch/um/kernel/asm-offsets.c
++++ b/arch/um/kernel/asm-offsets.c
+@@ -1 +1,3 @@
++#define COMPILE_OFFSETS
++
+ #include <sysdep/kernel-offsets.h>
+diff --git a/arch/xtensa/kernel/asm-offsets.c b/arch/xtensa/kernel/asm-offsets.c
+index da38de20ae598..cfbced95e944a 100644
+--- a/arch/xtensa/kernel/asm-offsets.c
++++ b/arch/xtensa/kernel/asm-offsets.c
+@@ -11,6 +11,7 @@
+ *
+ * Chris Zankel <chris@zankel.net>
+ */
++#define COMPILE_OFFSETS
+
+ #include <asm/processor.h>
+ #include <asm/coprocessor.h>
+--
+2.51.0
+
--- /dev/null
+From 1a4b6afa3244441ca0aea69a7dde7f080c6686da Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 27 Aug 2025 12:10:28 +0100
+Subject: btrfs: always drop log root tree reference in btrfs_replay_log()
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit 2f5b8095ea47b142c56c09755a8b1e14145a2d30 ]
+
+Currently we have this odd behaviour:
+
+1) At btrfs_replay_log() we drop the reference of the log root tree if
+ the call to btrfs_recover_log_trees() failed;
+
+2) But if the call to btrfs_recover_log_trees() did not fail, we don't
+ drop the reference in btrfs_replay_log() - we expect that
+ btrfs_recover_log_trees() does it in case it returns success.
+
+Let's simplify this and make btrfs_replay_log() always drop the reference
+on the log root tree, not only this simplifies code as it's what makes
+sense since it's btrfs_replay_log() who grabbed the reference in the first
+place.
+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/disk-io.c | 2 +-
+ fs/btrfs/tree-log.c | 1 -
+ 2 files changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
+index 76a261cbf39d6..8576ba4aa0b7d 100644
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -2413,10 +2413,10 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
+
+ /* returns with log_tree_root freed on success */
+ ret = btrfs_recover_log_trees(log_tree_root);
++ btrfs_put_root(log_tree_root);
+ if (ret) {
+ btrfs_handle_fs_error(fs_info, ret,
+ "Failed to recover log tree");
+- btrfs_put_root(log_tree_root);
+ return ret;
+ }
+
+diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
+index e4cc287eee993..fdcf66ba318ad 100644
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -7366,7 +7366,6 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
+
+ log_root_tree->log_root = NULL;
+ clear_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags);
+- btrfs_put_root(log_root_tree);
+
+ return 0;
+ error:
+--
+2.51.0
+
--- /dev/null
+From 6afe9f968816990cae616be5a5ce679304c90cec Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 1 Sep 2025 17:01:44 +0200
+Subject: btrfs: scrub: replace max_t()/min_t() with clamp() in
+ scrub_throttle_dev_io()
+
+From: Thorsten Blum <thorsten.blum@linux.dev>
+
+[ Upstream commit a7f3dfb8293c4cee99743132d69863a92e8f4875 ]
+
+Replace max_t() followed by min_t() with a single clamp().
+
+As was pointed by David Laight in
+https://lore.kernel.org/linux-btrfs/20250906122458.75dfc8f0@pumpkin/
+the calculation may overflow u32 when the input value is too large, so
+clamp_t() is not used. In practice the expected values are in range of
+megabytes to gigabytes (throughput limit) so the bug would not happen.
+
+Signed-off-by: Thorsten Blum <thorsten.blum@linux.dev>
+Reviewed-by: David Sterba <dsterba@suse.com>
+[ Use clamp() and add explanation. ]
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/scrub.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
+index f48895a9b165e..ce8a9c226534f 100644
+--- a/fs/btrfs/scrub.c
++++ b/fs/btrfs/scrub.c
+@@ -2191,8 +2191,7 @@ static void scrub_throttle(struct scrub_ctx *sctx)
+ * Slice is divided into intervals when the IO is submitted, adjust by
+ * bwlimit and maximum of 64 intervals.
+ */
+- div = max_t(u32, 1, (u32)(bwlimit / (16 * 1024 * 1024)));
+- div = min_t(u32, 64, div);
++ div = clamp(bwlimit / (16 * 1024 * 1024), 1, 64);
+
+ /* Start new epoch, set deadline */
+ now = ktime_get();
+--
+2.51.0
+
--- /dev/null
+From db12f5ea1c6e8ad6962fd524fdc61c9a6f3158ba Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Sep 2025 12:09:14 +0100
+Subject: btrfs: use smp_mb__after_atomic() when forcing COW in
+ create_pending_snapshot()
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit 45c222468d33202c07c41c113301a4b9c8451b8f ]
+
+After setting the BTRFS_ROOT_FORCE_COW flag on the root we are doing a
+full write barrier, smp_wmb(), but we don't need to, all we need is a
+smp_mb__after_atomic(). The use of the smp_wmb() is from the old days
+when we didn't use a bit and used instead an int field in the root to
+signal if cow is forced. After the int field was changed to a bit in
+the root's state (flags field), we forgot to update the memory barrier
+in create_pending_snapshot() to smp_mb__after_atomic(), but we did the
+change in commit_fs_roots() after clearing BTRFS_ROOT_FORCE_COW. That
+happened in commit 27cdeb7096b8 ("Btrfs: use bitfield instead of integer
+data type for the some variants in btrfs_root"). On the reader side, in
+should_cow_block(), we also use the counterpart smp_mb__before_atomic()
+which generates further confusion.
+
+So change the smp_wmb() to smp_mb__after_atomic(). In fact we don't
+even need any barrier at all since create_pending_snapshot() is called
+in the critical section of a transaction commit and therefore no one
+can concurrently join/attach the transaction, or start a new one, until
+the transaction is unblocked. By the time someone starts a new transaction
+and enters should_cow_block(), a lot of implicit memory barriers already
+took place by having acquired several locks such as fs_info->trans_lock
+and extent buffer locks on the root node at least. Nevertlheless, for
+consistency use smp_mb__after_atomic() after setting the force cow bit
+in create_pending_snapshot().
+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/transaction.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
+index ff3e0d4cf4b48..54894a950c6f7 100644
+--- a/fs/btrfs/transaction.c
++++ b/fs/btrfs/transaction.c
+@@ -1787,7 +1787,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
+ }
+ /* see comments in should_cow_block() */
+ set_bit(BTRFS_ROOT_FORCE_COW, &root->state);
+- smp_wmb();
++ smp_mb__after_atomic();
+
+ btrfs_set_root_node(new_root_item, tmp);
+ /* record when the snapshot was created in key.offset */
+--
+2.51.0
+
--- /dev/null
+From 138ae4c24306cd63476f082697a728b825b23160 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Jul 2025 11:13:15 +0900
+Subject: btrfs: zoned: refine extent allocator hint selection
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+[ Upstream commit 0d703963d297964451783e1a0688ebdf74cd6151 ]
+
+The hint block group selection in the extent allocator is wrong in the
+first place, as it can select the dedicated data relocation block group for
+the normal data allocation.
+
+Since we separated the normal data space_info and the data relocation
+space_info, we can easily identify a block group is for data relocation or
+not. Do not choose it for the normal data allocation.
+
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/extent-tree.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
+index 5395e27f9e89a..7985ca56f6b70 100644
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -4224,7 +4224,8 @@ static int prepare_allocation_clustered(struct btrfs_fs_info *fs_info,
+ }
+
+ static int prepare_allocation_zoned(struct btrfs_fs_info *fs_info,
+- struct find_free_extent_ctl *ffe_ctl)
++ struct find_free_extent_ctl *ffe_ctl,
++ struct btrfs_space_info *space_info)
+ {
+ if (ffe_ctl->for_treelog) {
+ spin_lock(&fs_info->treelog_bg_lock);
+@@ -4248,6 +4249,7 @@ static int prepare_allocation_zoned(struct btrfs_fs_info *fs_info,
+ u64 avail = block_group->zone_capacity - block_group->alloc_offset;
+
+ if (block_group_bits(block_group, ffe_ctl->flags) &&
++ block_group->space_info == space_info &&
+ avail >= ffe_ctl->num_bytes) {
+ ffe_ctl->hint_byte = block_group->start;
+ break;
+@@ -4269,7 +4271,7 @@ static int prepare_allocation(struct btrfs_fs_info *fs_info,
+ return prepare_allocation_clustered(fs_info, ffe_ctl,
+ space_info, ins);
+ case BTRFS_EXTENT_ALLOC_ZONED:
+- return prepare_allocation_zoned(fs_info, ffe_ctl);
++ return prepare_allocation_zoned(fs_info, ffe_ctl, space_info);
+ default:
+ BUG();
+ }
+--
+2.51.0
+
--- /dev/null
+From 0eb6e482475c0c284a317ac5c506a4b8996c084e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 16 Sep 2025 20:30:17 +0000
+Subject: EDAC/mc_sysfs: Increase legacy channel support to 16
+
+From: Avadhut Naik <avadhut.naik@amd.com>
+
+[ Upstream commit 6e1c2c6c2c40ce99e0d2633b212f43c702c1a002 ]
+
+Newer AMD systems can support up to 16 channels per EDAC "mc" device.
+These are detected by the EDAC module running on the device, and the
+current EDAC interface is appropriately enumerated.
+
+The legacy EDAC sysfs interface however, provides device attributes for
+channels 0 through 11 only. Consequently, the last four channels, 12
+through 15, will not be enumerated and will not be visible through the
+legacy sysfs interface.
+
+Add additional device attributes to ensure that all 16 channels, if
+present, are enumerated by and visible through the legacy EDAC sysfs
+interface.
+
+Signed-off-by: Avadhut Naik <avadhut.naik@amd.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://lore.kernel.org/20250916203242.1281036-1-avadhut.naik@amd.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/edac/edac_mc_sysfs.c | 24 ++++++++++++++++++++++++
+ 1 file changed, 24 insertions(+)
+
+diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
+index 15f63452a9bec..b01436d9ddaed 100644
+--- a/drivers/edac/edac_mc_sysfs.c
++++ b/drivers/edac/edac_mc_sysfs.c
+@@ -306,6 +306,14 @@ DEVICE_CHANNEL(ch10_dimm_label, S_IRUGO | S_IWUSR,
+ channel_dimm_label_show, channel_dimm_label_store, 10);
+ DEVICE_CHANNEL(ch11_dimm_label, S_IRUGO | S_IWUSR,
+ channel_dimm_label_show, channel_dimm_label_store, 11);
++DEVICE_CHANNEL(ch12_dimm_label, S_IRUGO | S_IWUSR,
++ channel_dimm_label_show, channel_dimm_label_store, 12);
++DEVICE_CHANNEL(ch13_dimm_label, S_IRUGO | S_IWUSR,
++ channel_dimm_label_show, channel_dimm_label_store, 13);
++DEVICE_CHANNEL(ch14_dimm_label, S_IRUGO | S_IWUSR,
++ channel_dimm_label_show, channel_dimm_label_store, 14);
++DEVICE_CHANNEL(ch15_dimm_label, S_IRUGO | S_IWUSR,
++ channel_dimm_label_show, channel_dimm_label_store, 15);
+
+ /* Total possible dynamic DIMM Label attribute file table */
+ static struct attribute *dynamic_csrow_dimm_attr[] = {
+@@ -321,6 +329,10 @@ static struct attribute *dynamic_csrow_dimm_attr[] = {
+ &dev_attr_legacy_ch9_dimm_label.attr.attr,
+ &dev_attr_legacy_ch10_dimm_label.attr.attr,
+ &dev_attr_legacy_ch11_dimm_label.attr.attr,
++ &dev_attr_legacy_ch12_dimm_label.attr.attr,
++ &dev_attr_legacy_ch13_dimm_label.attr.attr,
++ &dev_attr_legacy_ch14_dimm_label.attr.attr,
++ &dev_attr_legacy_ch15_dimm_label.attr.attr,
+ NULL
+ };
+
+@@ -349,6 +361,14 @@ DEVICE_CHANNEL(ch10_ce_count, S_IRUGO,
+ channel_ce_count_show, NULL, 10);
+ DEVICE_CHANNEL(ch11_ce_count, S_IRUGO,
+ channel_ce_count_show, NULL, 11);
++DEVICE_CHANNEL(ch12_ce_count, S_IRUGO,
++ channel_ce_count_show, NULL, 12);
++DEVICE_CHANNEL(ch13_ce_count, S_IRUGO,
++ channel_ce_count_show, NULL, 13);
++DEVICE_CHANNEL(ch14_ce_count, S_IRUGO,
++ channel_ce_count_show, NULL, 14);
++DEVICE_CHANNEL(ch15_ce_count, S_IRUGO,
++ channel_ce_count_show, NULL, 15);
+
+ /* Total possible dynamic ce_count attribute file table */
+ static struct attribute *dynamic_csrow_ce_count_attr[] = {
+@@ -364,6 +384,10 @@ static struct attribute *dynamic_csrow_ce_count_attr[] = {
+ &dev_attr_legacy_ch9_ce_count.attr.attr,
+ &dev_attr_legacy_ch10_ce_count.attr.attr,
+ &dev_attr_legacy_ch11_ce_count.attr.attr,
++ &dev_attr_legacy_ch12_ce_count.attr.attr,
++ &dev_attr_legacy_ch13_ce_count.attr.attr,
++ &dev_attr_legacy_ch14_ce_count.attr.attr,
++ &dev_attr_legacy_ch15_ce_count.attr.attr,
+ NULL
+ };
+
+--
+2.51.0
+
--- /dev/null
+From db01bc1334e68bb784336c7c6a17f5330fe8bd7a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Aug 2025 14:03:40 -0400
+Subject: perf: Have get_perf_callchain() return NULL if crosstask and user are
+ set
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+[ Upstream commit 153f9e74dec230f2e070e16fa061bc7adfd2c450 ]
+
+get_perf_callchain() doesn't support cross-task unwinding for user space
+stacks, have it return NULL if both the crosstask and user arguments are
+set.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20250820180428.426423415@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/events/callchain.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
+index 1273be84392cf..ce5534c97cd1d 100644
+--- a/kernel/events/callchain.c
++++ b/kernel/events/callchain.c
+@@ -184,6 +184,10 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
+ struct perf_callchain_entry_ctx ctx;
+ int rctx;
+
++ /* crosstask is not supported for user stacks */
++ if (crosstask && user && !kernel)
++ return NULL;
++
+ entry = get_callchain_entry(&rctx);
+ if (!entry)
+ return NULL;
+@@ -200,7 +204,7 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
+ perf_callchain_kernel(&ctx, regs);
+ }
+
+- if (user) {
++ if (user && !crosstask) {
+ if (!user_mode(regs)) {
+ if (current->mm)
+ regs = task_pt_regs(current);
+@@ -209,9 +213,6 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
+ }
+
+ if (regs) {
+- if (crosstask)
+- goto exit_put;
+-
+ if (add_mark)
+ perf_callchain_store_context(&ctx, PERF_CONTEXT_USER);
+
+@@ -219,7 +220,6 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
+ }
+ }
+
+-exit_put:
+ put_callchain_entry(rctx);
+
+ return entry;
+--
+2.51.0
+
net-sched-sch_qfq-fix-null-deref-in-agg_dequeue.patch
+perf-have-get_perf_callchain-return-null-if-crosstas.patch
+x86-bugs-fix-reporting-of-lfence-retpoline.patch
+edac-mc_sysfs-increase-legacy-channel-support-to-16.patch
+btrfs-zoned-refine-extent-allocator-hint-selection.patch
+btrfs-scrub-replace-max_t-min_t-with-clamp-in-scrub_.patch
+btrfs-always-drop-log-root-tree-reference-in-btrfs_r.patch
+btrfs-use-smp_mb__after_atomic-when-forcing-cow-in-c.patch
+arch-add-the-macro-compile_offsets-to-all-the-asm-of.patch
--- /dev/null
+From 83bed2b0363895cb49a0657a2060b85a29fbee20 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 15 Sep 2025 08:47:05 -0500
+Subject: x86/bugs: Fix reporting of LFENCE retpoline
+
+From: David Kaplan <david.kaplan@amd.com>
+
+[ Upstream commit d1cc1baef67ac6c09b74629ca053bf3fb812f7dc ]
+
+The LFENCE retpoline mitigation is not secure but the kernel prints
+inconsistent messages about this fact. The dmesg log says 'Mitigation:
+LFENCE', implying the system is mitigated. But sysfs reports 'Vulnerable:
+LFENCE' implying the system (correctly) is not mitigated.
+
+Fix this by printing a consistent 'Vulnerable: LFENCE' string everywhere
+when this mitigation is selected.
+
+Signed-off-by: David Kaplan <david.kaplan@amd.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://lore.kernel.org/20250915134706.3201818-1-david.kaplan@amd.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kernel/cpu/bugs.c | 5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
+index ff8965bce6c90..a0b362ac50a1b 100644
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -1539,7 +1539,7 @@ spectre_v2_user_select_mitigation(void)
+ static const char * const spectre_v2_strings[] = {
+ [SPECTRE_V2_NONE] = "Vulnerable",
+ [SPECTRE_V2_RETPOLINE] = "Mitigation: Retpolines",
+- [SPECTRE_V2_LFENCE] = "Mitigation: LFENCE",
++ [SPECTRE_V2_LFENCE] = "Vulnerable: LFENCE",
+ [SPECTRE_V2_EIBRS] = "Mitigation: Enhanced / Automatic IBRS",
+ [SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced / Automatic IBRS + LFENCE",
+ [SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced / Automatic IBRS + Retpolines",
+@@ -3169,9 +3169,6 @@ static const char *spectre_bhi_state(void)
+
+ static ssize_t spectre_v2_show_state(char *buf)
+ {
+- if (spectre_v2_enabled == SPECTRE_V2_LFENCE)
+- return sysfs_emit(buf, "Vulnerable: LFENCE\n");
+-
+ if (spectre_v2_enabled == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled())
+ return sysfs_emit(buf, "Vulnerable: eIBRS with unprivileged eBPF\n");
+
+--
+2.51.0
+
--- /dev/null
+From 8905c9ee0afca1bccc75668f38209611ee29903d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 17 Sep 2025 14:09:13 +0800
+Subject: arch: Add the macro COMPILE_OFFSETS to all the asm-offsets.c
+
+From: Menglong Dong <menglong8.dong@gmail.com>
+
+[ Upstream commit 35561bab768977c9e05f1f1a9bc00134c85f3e28 ]
+
+The include/generated/asm-offsets.h is generated in Kbuild during
+compiling from arch/SRCARCH/kernel/asm-offsets.c. When we want to
+generate another similar offset header file, circular dependency can
+happen.
+
+For example, we want to generate a offset file include/generated/test.h,
+which is included in include/sched/sched.h. If we generate asm-offsets.h
+first, it will fail, as include/sched/sched.h is included in asm-offsets.c
+and include/generated/test.h doesn't exist; If we generate test.h first,
+it can't success neither, as include/generated/asm-offsets.h is included
+by it.
+
+In x86_64, the macro COMPILE_OFFSETS is used to avoid such circular
+dependency. We can generate asm-offsets.h first, and if the
+COMPILE_OFFSETS is defined, we don't include the "generated/test.h".
+
+And we define the macro COMPILE_OFFSETS for all the asm-offsets.c for this
+purpose.
+
+Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/alpha/kernel/asm-offsets.c | 1 +
+ arch/arc/kernel/asm-offsets.c | 1 +
+ arch/arm/kernel/asm-offsets.c | 2 ++
+ arch/arm64/kernel/asm-offsets.c | 1 +
+ arch/csky/kernel/asm-offsets.c | 1 +
+ arch/hexagon/kernel/asm-offsets.c | 1 +
+ arch/loongarch/kernel/asm-offsets.c | 2 ++
+ arch/m68k/kernel/asm-offsets.c | 1 +
+ arch/microblaze/kernel/asm-offsets.c | 1 +
+ arch/mips/kernel/asm-offsets.c | 2 ++
+ arch/nios2/kernel/asm-offsets.c | 1 +
+ arch/openrisc/kernel/asm-offsets.c | 1 +
+ arch/parisc/kernel/asm-offsets.c | 1 +
+ arch/powerpc/kernel/asm-offsets.c | 1 +
+ arch/riscv/kernel/asm-offsets.c | 1 +
+ arch/s390/kernel/asm-offsets.c | 1 +
+ arch/sh/kernel/asm-offsets.c | 1 +
+ arch/sparc/kernel/asm-offsets.c | 1 +
+ arch/um/kernel/asm-offsets.c | 2 ++
+ arch/xtensa/kernel/asm-offsets.c | 1 +
+ 20 files changed, 24 insertions(+)
+
+diff --git a/arch/alpha/kernel/asm-offsets.c b/arch/alpha/kernel/asm-offsets.c
+index e9dad60b147f3..1ebb058904992 100644
+--- a/arch/alpha/kernel/asm-offsets.c
++++ b/arch/alpha/kernel/asm-offsets.c
+@@ -4,6 +4,7 @@
+ * This code generates raw asm output which is post-processed to extract
+ * and format the required data.
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/types.h>
+ #include <linux/stddef.h>
+diff --git a/arch/arc/kernel/asm-offsets.c b/arch/arc/kernel/asm-offsets.c
+index f77deb7991757..2978da85fcb65 100644
+--- a/arch/arc/kernel/asm-offsets.c
++++ b/arch/arc/kernel/asm-offsets.c
+@@ -2,6 +2,7 @@
+ /*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/sched.h>
+ #include <linux/mm.h>
+diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
+index 4853875740d0f..d9f129c584b1d 100644
+--- a/arch/arm/kernel/asm-offsets.c
++++ b/arch/arm/kernel/asm-offsets.c
+@@ -7,6 +7,8 @@
+ * This code generates raw asm output which is post-processed to extract
+ * and format the required data.
+ */
++#define COMPILE_OFFSETS
++
+ #include <linux/compiler.h>
+ #include <linux/sched.h>
+ #include <linux/mm.h>
+diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
+index b21dd24b8efc3..020e01181a0f1 100644
+--- a/arch/arm64/kernel/asm-offsets.c
++++ b/arch/arm64/kernel/asm-offsets.c
+@@ -6,6 +6,7 @@
+ * 2001-2002 Keith Owens
+ * Copyright (C) 2012 ARM Ltd.
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/arm_sdei.h>
+ #include <linux/sched.h>
+diff --git a/arch/csky/kernel/asm-offsets.c b/arch/csky/kernel/asm-offsets.c
+index d1e9035794733..5525c8e7e1d9e 100644
+--- a/arch/csky/kernel/asm-offsets.c
++++ b/arch/csky/kernel/asm-offsets.c
+@@ -1,5 +1,6 @@
+ // SPDX-License-Identifier: GPL-2.0
+ // Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
++#define COMPILE_OFFSETS
+
+ #include <linux/sched.h>
+ #include <linux/kernel_stat.h>
+diff --git a/arch/hexagon/kernel/asm-offsets.c b/arch/hexagon/kernel/asm-offsets.c
+index 03a7063f94561..50eea9fa6f137 100644
+--- a/arch/hexagon/kernel/asm-offsets.c
++++ b/arch/hexagon/kernel/asm-offsets.c
+@@ -8,6 +8,7 @@
+ *
+ * Copyright (c) 2010-2012, The Linux Foundation. All rights reserved.
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/compat.h>
+ #include <linux/types.h>
+diff --git a/arch/loongarch/kernel/asm-offsets.c b/arch/loongarch/kernel/asm-offsets.c
+index bee9f7a3108f0..d20d71d4bcae6 100644
+--- a/arch/loongarch/kernel/asm-offsets.c
++++ b/arch/loongarch/kernel/asm-offsets.c
+@@ -4,6 +4,8 @@
+ *
+ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
+ */
++#define COMPILE_OFFSETS
++
+ #include <linux/types.h>
+ #include <linux/sched.h>
+ #include <linux/mm.h>
+diff --git a/arch/m68k/kernel/asm-offsets.c b/arch/m68k/kernel/asm-offsets.c
+index 906d732305374..67a1990f9d748 100644
+--- a/arch/m68k/kernel/asm-offsets.c
++++ b/arch/m68k/kernel/asm-offsets.c
+@@ -9,6 +9,7 @@
+ * #defines from the assembly-language output.
+ */
+
++#define COMPILE_OFFSETS
+ #define ASM_OFFSETS_C
+
+ #include <linux/stddef.h>
+diff --git a/arch/microblaze/kernel/asm-offsets.c b/arch/microblaze/kernel/asm-offsets.c
+index 104c3ac5f30c8..b4b67d58e7f6a 100644
+--- a/arch/microblaze/kernel/asm-offsets.c
++++ b/arch/microblaze/kernel/asm-offsets.c
+@@ -7,6 +7,7 @@
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/init.h>
+ #include <linux/stddef.h>
+diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c
+index cb1045ebab062..22c99a2cd5707 100644
+--- a/arch/mips/kernel/asm-offsets.c
++++ b/arch/mips/kernel/asm-offsets.c
+@@ -9,6 +9,8 @@
+ * Kevin Kissell, kevink@mips.com and Carsten Langgaard, carstenl@mips.com
+ * Copyright (C) 2000 MIPS Technologies, Inc.
+ */
++#define COMPILE_OFFSETS
++
+ #include <linux/compat.h>
+ #include <linux/types.h>
+ #include <linux/sched.h>
+diff --git a/arch/nios2/kernel/asm-offsets.c b/arch/nios2/kernel/asm-offsets.c
+index e3d9b7b6fb48a..88190b503ce5d 100644
+--- a/arch/nios2/kernel/asm-offsets.c
++++ b/arch/nios2/kernel/asm-offsets.c
+@@ -2,6 +2,7 @@
+ /*
+ * Copyright (C) 2011 Tobias Klauser <tklauser@distanz.ch>
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/stddef.h>
+ #include <linux/sched.h>
+diff --git a/arch/openrisc/kernel/asm-offsets.c b/arch/openrisc/kernel/asm-offsets.c
+index 710651d5aaae1..3cc826f2216b1 100644
+--- a/arch/openrisc/kernel/asm-offsets.c
++++ b/arch/openrisc/kernel/asm-offsets.c
+@@ -18,6 +18,7 @@
+ * compile this file to assembler, and then extract the
+ * #defines from the assembly-language output.
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/signal.h>
+ #include <linux/sched.h>
+diff --git a/arch/parisc/kernel/asm-offsets.c b/arch/parisc/kernel/asm-offsets.c
+index 757816a7bd4b2..9abfe65492c65 100644
+--- a/arch/parisc/kernel/asm-offsets.c
++++ b/arch/parisc/kernel/asm-offsets.c
+@@ -13,6 +13,7 @@
+ * Copyright (C) 2002 Randolph Chung <tausq with parisc-linux.org>
+ * Copyright (C) 2003 James Bottomley <jejb at parisc-linux.org>
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/types.h>
+ #include <linux/sched.h>
+diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
+index 131a8cc10dbe8..cbeeda45c00a2 100644
+--- a/arch/powerpc/kernel/asm-offsets.c
++++ b/arch/powerpc/kernel/asm-offsets.c
+@@ -8,6 +8,7 @@
+ * compile this file to assembler, and then extract the
+ * #defines from the assembly-language output.
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/compat.h>
+ #include <linux/signal.h>
+diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
+index c2f3129a8e5cf..05c6152a65310 100644
+--- a/arch/riscv/kernel/asm-offsets.c
++++ b/arch/riscv/kernel/asm-offsets.c
+@@ -3,6 +3,7 @@
+ * Copyright (C) 2012 Regents of the University of California
+ * Copyright (C) 2017 SiFive
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/kbuild.h>
+ #include <linux/mm.h>
+diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
+index 5529248d84fb8..3cfc4939033c9 100644
+--- a/arch/s390/kernel/asm-offsets.c
++++ b/arch/s390/kernel/asm-offsets.c
+@@ -4,6 +4,7 @@
+ * This code generates raw asm output which is post-processed to extract
+ * and format the required data.
+ */
++#define COMPILE_OFFSETS
+
+ #define ASM_OFFSETS_C
+
+diff --git a/arch/sh/kernel/asm-offsets.c b/arch/sh/kernel/asm-offsets.c
+index a0322e8328456..429b6a7631468 100644
+--- a/arch/sh/kernel/asm-offsets.c
++++ b/arch/sh/kernel/asm-offsets.c
+@@ -8,6 +8,7 @@
+ * compile this file to assembler, and then extract the
+ * #defines from the assembly-language output.
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/stddef.h>
+ #include <linux/types.h>
+diff --git a/arch/sparc/kernel/asm-offsets.c b/arch/sparc/kernel/asm-offsets.c
+index 3d9b9855dce91..6e660bde48dd8 100644
+--- a/arch/sparc/kernel/asm-offsets.c
++++ b/arch/sparc/kernel/asm-offsets.c
+@@ -10,6 +10,7 @@
+ *
+ * On sparc, thread_info data is static and TI_XXX offsets are computed by hand.
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/sched.h>
+ #include <linux/mm_types.h>
+diff --git a/arch/um/kernel/asm-offsets.c b/arch/um/kernel/asm-offsets.c
+index 1fb12235ab9c8..a69873aa697f4 100644
+--- a/arch/um/kernel/asm-offsets.c
++++ b/arch/um/kernel/asm-offsets.c
+@@ -1 +1,3 @@
++#define COMPILE_OFFSETS
++
+ #include <sysdep/kernel-offsets.h>
+diff --git a/arch/xtensa/kernel/asm-offsets.c b/arch/xtensa/kernel/asm-offsets.c
+index da38de20ae598..cfbced95e944a 100644
+--- a/arch/xtensa/kernel/asm-offsets.c
++++ b/arch/xtensa/kernel/asm-offsets.c
+@@ -11,6 +11,7 @@
+ *
+ * Chris Zankel <chris@zankel.net>
+ */
++#define COMPILE_OFFSETS
+
+ #include <asm/processor.h>
+ #include <asm/coprocessor.h>
+--
+2.51.0
+
--- /dev/null
+From 9c66e5dad6997a22b7ffbbecaed782d5db5c2542 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 6 Aug 2025 17:04:07 -0400
+Subject: audit: record fanotify event regardless of presence of rules
+
+From: Richard Guy Briggs <rgb@redhat.com>
+
+[ Upstream commit ce8370e2e62a903e18be7dd0e0be2eee079501e1 ]
+
+When no audit rules are in place, fanotify event results are
+unconditionally dropped due to an explicit check for the existence of
+any audit rules. Given this is a report from another security
+sub-system, allow it to be recorded regardless of the existence of any
+audit rules.
+
+To test, install and run the fapolicyd daemon with default config. Then
+as an unprivileged user, create and run a very simple binary that should
+be denied. Then check for an event with
+ ausearch -m FANOTIFY -ts recent
+
+Link: https://issues.redhat.com/browse/RHEL-9065
+Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
+Signed-off-by: Paul Moore <paul@paul-moore.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/audit.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/include/linux/audit.h b/include/linux/audit.h
+index a394614ccd0b8..e3f06eba9c6e6 100644
+--- a/include/linux/audit.h
++++ b/include/linux/audit.h
+@@ -527,7 +527,7 @@ static inline void audit_log_kern_module(const char *name)
+
+ static inline void audit_fanotify(u32 response, struct fanotify_response_info_audit_rule *friar)
+ {
+- if (!audit_dummy_context())
++ if (audit_enabled)
+ __audit_fanotify(response, friar);
+ }
+
+--
+2.51.0
+
--- /dev/null
+From 88d6ab1b772266a93ddd13ce9c15485bedc4322c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 3 Sep 2025 17:43:04 +0100
+Subject: btrfs: abort transaction if we fail to update inode in log replay dir
+ fixup
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit 5a0565cad3ef7cbf4cf43d1dd1e849b156205292 ]
+
+If we fail to update the inode at link_to_fixup_dir(), we don't abort the
+transaction and propagate the error up the call chain, which makes it hard
+to pinpoint the error to the inode update. So abort the transaction if the
+inode update call fails, so that if it happens we known immediately.
+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/tree-log.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
+index b43a7c0c7cb7a..173e13e1d5b88 100644
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -1778,6 +1778,8 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
+ else
+ inc_nlink(vfs_inode);
+ ret = btrfs_update_inode(trans, inode);
++ if (ret)
++ btrfs_abort_transaction(trans, ret);
+ } else if (ret == -EEXIST) {
+ ret = 0;
+ }
+--
+2.51.0
+
--- /dev/null
+From 9bdeac056de0e4eb2dfe729346d08037d6e5c175 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Jul 2025 15:49:31 +0100
+Subject: btrfs: abort transaction in the process_one_buffer() log tree walk
+ callback
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit e6dd405b6671b9753b98d8bdf76f8f0ed36c11cd ]
+
+In the process_one_buffer() log tree walk callback we return errors to the
+log tree walk caller and then the caller aborts the transaction, if we
+have one, or turns the fs into error state if we don't have one. While
+this reduces code it makes it harder to figure out where exactly an error
+came from. So add the transaction aborts after every failure inside the
+process_one_buffer() callback, so that it helps figuring out why failures
+happen.
+
+Reviewed-by: Boris Burkov <boris@bur.io>
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/tree-log.c | 20 ++++++++++++++++----
+ 1 file changed, 16 insertions(+), 4 deletions(-)
+
+diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
+index f3ca530f032df..1c207a6d71ecf 100644
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -350,6 +350,7 @@ static int process_one_buffer(struct btrfs_root *log,
+ struct extent_buffer *eb,
+ struct walk_control *wc, u64 gen, int level)
+ {
++ struct btrfs_trans_handle *trans = wc->trans;
+ struct btrfs_fs_info *fs_info = log->fs_info;
+ int ret = 0;
+
+@@ -364,18 +365,29 @@ static int process_one_buffer(struct btrfs_root *log,
+ };
+
+ ret = btrfs_read_extent_buffer(eb, &check);
+- if (ret)
++ if (ret) {
++ if (trans)
++ btrfs_abort_transaction(trans, ret);
++ else
++ btrfs_handle_fs_error(fs_info, ret, NULL);
+ return ret;
++ }
+ }
+
+ if (wc->pin) {
+- ret = btrfs_pin_extent_for_log_replay(wc->trans, eb);
+- if (ret)
++ ASSERT(trans != NULL);
++ ret = btrfs_pin_extent_for_log_replay(trans, eb);
++ if (ret) {
++ btrfs_abort_transaction(trans, ret);
+ return ret;
++ }
+
+ if (btrfs_buffer_uptodate(eb, gen, 0) &&
+- btrfs_header_level(eb) == 0)
++ btrfs_header_level(eb) == 0) {
+ ret = btrfs_exclude_logged_extents(eb);
++ if (ret)
++ btrfs_abort_transaction(trans, ret);
++ }
+ }
+ return ret;
+ }
+--
+2.51.0
+
--- /dev/null
+From 81186743181cc300f669cb0bfb781e773fa6ea6b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Jul 2025 14:56:11 +0100
+Subject: btrfs: abort transaction on specific error places when walking log
+ tree
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit 6ebd726b104fa99d47c0d45979e6a6109844ac18 ]
+
+We do several things while walking a log tree (for replaying and for
+freeing a log tree) like reading extent buffers and cleaning them up,
+but we don't immediately abort the transaction, or turn the fs into an
+error state, when one of these things fails. Instead we the transaction
+abort or turn the fs into error state in the caller of the entry point
+function that walks a log tree - walk_log_tree() - which means we don't
+get to know exactly where an error came from.
+
+Improve on this by doing a transaction abort / turn fs into error state
+after each such failure so that when it happens we have a better
+understanding where the failure comes from. This deliberately leaves
+the transaction abort / turn fs into error state in the callers of
+walk_log_tree() as to ensure we don't get into an inconsistent state in
+case we forget to do it deeper in call chain. It also deliberately does
+not do it after errors from the calls to the callback defined in
+struct walk_control::process_func(), as we will do it later on another
+patch.
+
+Reviewed-by: Boris Burkov <boris@bur.io>
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/tree-log.c | 33 ++++++++++++++++++++++++++++-----
+ 1 file changed, 28 insertions(+), 5 deletions(-)
+
+diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
+index 0022ad003791f..f3ca530f032df 100644
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -2612,15 +2612,24 @@ static int unaccount_log_buffer(struct btrfs_fs_info *fs_info, u64 start)
+ static int clean_log_buffer(struct btrfs_trans_handle *trans,
+ struct extent_buffer *eb)
+ {
++ int ret;
++
+ btrfs_tree_lock(eb);
+ btrfs_clear_buffer_dirty(trans, eb);
+ wait_on_extent_buffer_writeback(eb);
+ btrfs_tree_unlock(eb);
+
+- if (trans)
+- return btrfs_pin_reserved_extent(trans, eb);
++ if (trans) {
++ ret = btrfs_pin_reserved_extent(trans, eb);
++ if (ret)
++ btrfs_abort_transaction(trans, ret);
++ return ret;
++ }
+
+- return unaccount_log_buffer(eb->fs_info, eb->start);
++ ret = unaccount_log_buffer(eb->fs_info, eb->start);
++ if (ret)
++ btrfs_handle_fs_error(eb->fs_info, ret, NULL);
++ return ret;
+ }
+
+ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
+@@ -2656,8 +2665,14 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
+ next = btrfs_find_create_tree_block(fs_info, bytenr,
+ btrfs_header_owner(cur),
+ *level - 1);
+- if (IS_ERR(next))
+- return PTR_ERR(next);
++ if (IS_ERR(next)) {
++ ret = PTR_ERR(next);
++ if (trans)
++ btrfs_abort_transaction(trans, ret);
++ else
++ btrfs_handle_fs_error(fs_info, ret, NULL);
++ return ret;
++ }
+
+ if (*level == 1) {
+ ret = wc->process_func(root, next, wc, ptr_gen,
+@@ -2672,6 +2687,10 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
+ ret = btrfs_read_extent_buffer(next, &check);
+ if (ret) {
+ free_extent_buffer(next);
++ if (trans)
++ btrfs_abort_transaction(trans, ret);
++ else
++ btrfs_handle_fs_error(fs_info, ret, NULL);
+ return ret;
+ }
+
+@@ -2687,6 +2706,10 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
+ ret = btrfs_read_extent_buffer(next, &check);
+ if (ret) {
+ free_extent_buffer(next);
++ if (trans)
++ btrfs_abort_transaction(trans, ret);
++ else
++ btrfs_handle_fs_error(fs_info, ret, NULL);
+ return ret;
+ }
+
+--
+2.51.0
+
--- /dev/null
+From c6fd6b1021ea066aa250a87df9a586cacfc01851 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 27 Aug 2025 12:10:28 +0100
+Subject: btrfs: always drop log root tree reference in btrfs_replay_log()
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit 2f5b8095ea47b142c56c09755a8b1e14145a2d30 ]
+
+Currently we have this odd behaviour:
+
+1) At btrfs_replay_log() we drop the reference of the log root tree if
+ the call to btrfs_recover_log_trees() failed;
+
+2) But if the call to btrfs_recover_log_trees() did not fail, we don't
+ drop the reference in btrfs_replay_log() - we expect that
+ btrfs_recover_log_trees() does it in case it returns success.
+
+Let's simplify this and make btrfs_replay_log() always drop the reference
+on the log root tree, not only this simplifies code as it's what makes
+sense since it's btrfs_replay_log() who grabbed the reference in the first
+place.
+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/disk-io.c | 2 +-
+ fs/btrfs/tree-log.c | 1 -
+ 2 files changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
+index e655fa3bfd9be..3a73d218af464 100644
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -2100,10 +2100,10 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
+
+ /* returns with log_tree_root freed on success */
+ ret = btrfs_recover_log_trees(log_tree_root);
++ btrfs_put_root(log_tree_root);
+ if (ret) {
+ btrfs_handle_fs_error(fs_info, ret,
+ "Failed to recover log tree");
+- btrfs_put_root(log_tree_root);
+ return ret;
+ }
+
+diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
+index 1c207a6d71ecf..63b14005f5066 100644
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -7457,7 +7457,6 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
+
+ log_root_tree->log_root = NULL;
+ clear_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags);
+- btrfs_put_root(log_root_tree);
+
+ return 0;
+ error:
+--
+2.51.0
+
--- /dev/null
+From 8110fe15ebe327b95bcf726f399a81011b0fa47e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 1 Sep 2025 17:01:44 +0200
+Subject: btrfs: scrub: replace max_t()/min_t() with clamp() in
+ scrub_throttle_dev_io()
+
+From: Thorsten Blum <thorsten.blum@linux.dev>
+
+[ Upstream commit a7f3dfb8293c4cee99743132d69863a92e8f4875 ]
+
+Replace max_t() followed by min_t() with a single clamp().
+
+As was pointed by David Laight in
+https://lore.kernel.org/linux-btrfs/20250906122458.75dfc8f0@pumpkin/
+the calculation may overflow u32 when the input value is too large, so
+clamp_t() is not used. In practice the expected values are in range of
+megabytes to gigabytes (throughput limit) so the bug would not happen.
+
+Signed-off-by: Thorsten Blum <thorsten.blum@linux.dev>
+Reviewed-by: David Sterba <dsterba@suse.com>
+[ Use clamp() and add explanation. ]
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/scrub.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
+index 3fcc7c092c5ec..9a6e0b047d3b6 100644
+--- a/fs/btrfs/scrub.c
++++ b/fs/btrfs/scrub.c
+@@ -1270,8 +1270,7 @@ static void scrub_throttle_dev_io(struct scrub_ctx *sctx, struct btrfs_device *d
+ * Slice is divided into intervals when the IO is submitted, adjust by
+ * bwlimit and maximum of 64 intervals.
+ */
+- div = max_t(u32, 1, (u32)(bwlimit / (16 * 1024 * 1024)));
+- div = min_t(u32, 64, div);
++ div = clamp(bwlimit / (16 * 1024 * 1024), 1, 64);
+
+ /* Start new epoch, set deadline */
+ now = ktime_get();
+--
+2.51.0
+
--- /dev/null
+From 277716e473637786bb6a577d628c45e2e3465378 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 16 Sep 2025 08:34:05 +0930
+Subject: btrfs: tree-checker: add inode extref checks
+
+From: Qu Wenruo <wqu@suse.com>
+
+[ Upstream commit aab9458b9f0019e97fae394c2d6d9d1a03addfb3 ]
+
+Like inode refs, inode extrefs have a variable length name, which means
+we have to do a proper check to make sure no header nor name can exceed
+the item limits.
+
+The check itself is very similar to check_inode_ref(), just a different
+structure (btrfs_inode_extref vs btrfs_inode_ref).
+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/tree-checker.c | 37 +++++++++++++++++++++++++++++++++++++
+ 1 file changed, 37 insertions(+)
+
+diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
+index 14f96d217e6e1..986b1612d5b04 100644
+--- a/fs/btrfs/tree-checker.c
++++ b/fs/btrfs/tree-checker.c
+@@ -183,6 +183,7 @@ static bool check_prev_ino(struct extent_buffer *leaf,
+ /* Only these key->types needs to be checked */
+ ASSERT(key->type == BTRFS_XATTR_ITEM_KEY ||
+ key->type == BTRFS_INODE_REF_KEY ||
++ key->type == BTRFS_INODE_EXTREF_KEY ||
+ key->type == BTRFS_DIR_INDEX_KEY ||
+ key->type == BTRFS_DIR_ITEM_KEY ||
+ key->type == BTRFS_EXTENT_DATA_KEY);
+@@ -1770,6 +1771,39 @@ static int check_inode_ref(struct extent_buffer *leaf,
+ return 0;
+ }
+
++static int check_inode_extref(struct extent_buffer *leaf,
++ struct btrfs_key *key, struct btrfs_key *prev_key,
++ int slot)
++{
++ unsigned long ptr = btrfs_item_ptr_offset(leaf, slot);
++ unsigned long end = ptr + btrfs_item_size(leaf, slot);
++
++ if (unlikely(!check_prev_ino(leaf, key, slot, prev_key)))
++ return -EUCLEAN;
++
++ while (ptr < end) {
++ struct btrfs_inode_extref *extref = (struct btrfs_inode_extref *)ptr;
++ u16 namelen;
++
++ if (unlikely(ptr + sizeof(*extref)) > end) {
++ inode_ref_err(leaf, slot,
++ "inode extref overflow, ptr %lu end %lu inode_extref size %zu",
++ ptr, end, sizeof(*extref));
++ return -EUCLEAN;
++ }
++
++ namelen = btrfs_inode_extref_name_len(leaf, extref);
++ if (unlikely(ptr + sizeof(*extref) + namelen > end)) {
++ inode_ref_err(leaf, slot,
++ "inode extref overflow, ptr %lu end %lu namelen %u",
++ ptr, end, namelen);
++ return -EUCLEAN;
++ }
++ ptr += sizeof(*extref) + namelen;
++ }
++ return 0;
++}
++
+ static int check_raid_stripe_extent(const struct extent_buffer *leaf,
+ const struct btrfs_key *key, int slot)
+ {
+@@ -1881,6 +1915,9 @@ static enum btrfs_tree_block_status check_leaf_item(struct extent_buffer *leaf,
+ case BTRFS_INODE_REF_KEY:
+ ret = check_inode_ref(leaf, key, prev_key, slot);
+ break;
++ case BTRFS_INODE_EXTREF_KEY:
++ ret = check_inode_extref(leaf, key, prev_key, slot);
++ break;
+ case BTRFS_BLOCK_GROUP_ITEM_KEY:
+ ret = check_block_group_item(leaf, key, slot);
+ break;
+--
+2.51.0
+
--- /dev/null
+From bbe02836089d717f5f340b95731ecc35b7434a60 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 28 Aug 2025 17:46:18 +0100
+Subject: btrfs: use level argument in log tree walk callback
+ replay_one_buffer()
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit 6cb7f0b8c9b0d6a35682335fea88bd26f089306f ]
+
+We already have the extent buffer's level in an argument, there's no need
+to first ensure the extent buffer's data is loaded (by calling
+btrfs_read_extent_buffer()) and then call btrfs_header_level() to check
+the level. So use the level argument and do the check before calling
+btrfs_read_extent_buffer().
+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/tree-log.c | 8 +++-----
+ 1 file changed, 3 insertions(+), 5 deletions(-)
+
+diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
+index 63b14005f5066..b43a7c0c7cb7a 100644
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -2443,15 +2443,13 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
+ int i;
+ int ret;
+
++ if (level != 0)
++ return 0;
++
+ ret = btrfs_read_extent_buffer(eb, &check);
+ if (ret)
+ return ret;
+
+- level = btrfs_header_level(eb);
+-
+- if (level != 0)
+- return 0;
+-
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+--
+2.51.0
+
--- /dev/null
+From b6543dc88730987fe194ba4d74a9fb0de649075c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Sep 2025 12:09:14 +0100
+Subject: btrfs: use smp_mb__after_atomic() when forcing COW in
+ create_pending_snapshot()
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit 45c222468d33202c07c41c113301a4b9c8451b8f ]
+
+After setting the BTRFS_ROOT_FORCE_COW flag on the root we are doing a
+full write barrier, smp_wmb(), but we don't need to, all we need is a
+smp_mb__after_atomic(). The use of the smp_wmb() is from the old days
+when we didn't use a bit and used instead an int field in the root to
+signal if cow is forced. After the int field was changed to a bit in
+the root's state (flags field), we forgot to update the memory barrier
+in create_pending_snapshot() to smp_mb__after_atomic(), but we did the
+change in commit_fs_roots() after clearing BTRFS_ROOT_FORCE_COW. That
+happened in commit 27cdeb7096b8 ("Btrfs: use bitfield instead of integer
+data type for the some variants in btrfs_root"). On the reader side, in
+should_cow_block(), we also use the counterpart smp_mb__before_atomic()
+which generates further confusion.
+
+So change the smp_wmb() to smp_mb__after_atomic(). In fact we don't
+even need any barrier at all since create_pending_snapshot() is called
+in the critical section of a transaction commit and therefore no one
+can concurrently join/attach the transaction, or start a new one, until
+the transaction is unblocked. By the time someone starts a new transaction
+and enters should_cow_block(), a lot of implicit memory barriers already
+took place by having acquired several locks such as fs_info->trans_lock
+and extent buffer locks on the root node at least. Nevertlheless, for
+consistency use smp_mb__after_atomic() after setting the force cow bit
+in create_pending_snapshot().
+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/transaction.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
+index 1a029392eac52..f4dda72491feb 100644
+--- a/fs/btrfs/transaction.c
++++ b/fs/btrfs/transaction.c
+@@ -1810,7 +1810,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
+ }
+ /* see comments in should_cow_block() */
+ set_bit(BTRFS_ROOT_FORCE_COW, &root->state);
+- smp_wmb();
++ smp_mb__after_atomic();
+
+ btrfs_set_root_node(new_root_item, tmp);
+ /* record when the snapshot was created in key.offset */
+--
+2.51.0
+
--- /dev/null
+From b99faec603c7d9385177f7858dac67ed1d08c110 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Jul 2025 11:13:15 +0900
+Subject: btrfs: zoned: refine extent allocator hint selection
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+[ Upstream commit 0d703963d297964451783e1a0688ebdf74cd6151 ]
+
+The hint block group selection in the extent allocator is wrong in the
+first place, as it can select the dedicated data relocation block group for
+the normal data allocation.
+
+Since we separated the normal data space_info and the data relocation
+space_info, we can easily identify a block group is for data relocation or
+not. Do not choose it for the normal data allocation.
+
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/extent-tree.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
+index bb3602059906d..7bab2512468d5 100644
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -4299,7 +4299,8 @@ static int prepare_allocation_clustered(struct btrfs_fs_info *fs_info,
+ }
+
+ static int prepare_allocation_zoned(struct btrfs_fs_info *fs_info,
+- struct find_free_extent_ctl *ffe_ctl)
++ struct find_free_extent_ctl *ffe_ctl,
++ struct btrfs_space_info *space_info)
+ {
+ if (ffe_ctl->for_treelog) {
+ spin_lock(&fs_info->treelog_bg_lock);
+@@ -4323,6 +4324,7 @@ static int prepare_allocation_zoned(struct btrfs_fs_info *fs_info,
+ u64 avail = block_group->zone_capacity - block_group->alloc_offset;
+
+ if (block_group_bits(block_group, ffe_ctl->flags) &&
++ block_group->space_info == space_info &&
+ avail >= ffe_ctl->num_bytes) {
+ ffe_ctl->hint_byte = block_group->start;
+ break;
+@@ -4344,7 +4346,7 @@ static int prepare_allocation(struct btrfs_fs_info *fs_info,
+ return prepare_allocation_clustered(fs_info, ffe_ctl,
+ space_info, ins);
+ case BTRFS_EXTENT_ALLOC_ZONED:
+- return prepare_allocation_zoned(fs_info, ffe_ctl);
++ return prepare_allocation_zoned(fs_info, ffe_ctl, space_info);
+ default:
+ BUG();
+ }
+--
+2.51.0
+
--- /dev/null
+From 4a48cb581a19341097dba34421a1cb023b4d9e32 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 22 Jul 2025 13:39:11 +0200
+Subject: btrfs: zoned: return error from btrfs_zone_finish_endio()
+
+From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+
+[ Upstream commit 3c44cd3c79fcb38a86836dea6ff8fec322a9e68c ]
+
+Now that btrfs_zone_finish_endio_workfn() is directly calling
+do_zone_finish() the only caller of btrfs_zone_finish_endio() is
+btrfs_finish_one_ordered().
+
+btrfs_finish_one_ordered() already has error handling in-place so
+btrfs_zone_finish_endio() can return an error if the block group lookup
+fails.
+
+Also as btrfs_zone_finish_endio() already checks for zoned filesystems and
+returns early, there's no need to do this in the caller.
+
+Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
+Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/inode.c | 7 ++++---
+ fs/btrfs/zoned.c | 8 +++++---
+ fs/btrfs/zoned.h | 9 ++++++---
+ 3 files changed, 15 insertions(+), 9 deletions(-)
+
+diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
+index 19c0ec9c327c1..e32dd4193aea1 100644
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -3174,9 +3174,10 @@ int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent)
+ goto out;
+ }
+
+- if (btrfs_is_zoned(fs_info))
+- btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr,
+- ordered_extent->disk_num_bytes);
++ ret = btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr,
++ ordered_extent->disk_num_bytes);
++ if (ret)
++ goto out;
+
+ if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) {
+ truncated = true;
+diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
+index 4966b4f5a7d24..64e0a5bf5f9a5 100644
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -2384,16 +2384,17 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)
+ return ret;
+ }
+
+-void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 length)
++int btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 length)
+ {
+ struct btrfs_block_group *block_group;
+ u64 min_alloc_bytes;
+
+ if (!btrfs_is_zoned(fs_info))
+- return;
++ return 0;
+
+ block_group = btrfs_lookup_block_group(fs_info, logical);
+- ASSERT(block_group);
++ if (WARN_ON_ONCE(!block_group))
++ return -ENOENT;
+
+ /* No MIXED_BG on zoned btrfs. */
+ if (block_group->flags & BTRFS_BLOCK_GROUP_DATA)
+@@ -2410,6 +2411,7 @@ void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 len
+
+ out:
+ btrfs_put_block_group(block_group);
++ return 0;
+ }
+
+ static void btrfs_zone_finish_endio_workfn(struct work_struct *work)
+diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
+index 7612e65726053..f7171ab6ed71e 100644
+--- a/fs/btrfs/zoned.h
++++ b/fs/btrfs/zoned.h
+@@ -83,7 +83,7 @@ int btrfs_sync_zone_write_pointer(struct btrfs_device *tgt_dev, u64 logical,
+ bool btrfs_zone_activate(struct btrfs_block_group *block_group);
+ int btrfs_zone_finish(struct btrfs_block_group *block_group);
+ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags);
+-void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical,
++int btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical,
+ u64 length);
+ void btrfs_schedule_zone_finish_bg(struct btrfs_block_group *bg,
+ struct extent_buffer *eb);
+@@ -232,8 +232,11 @@ static inline bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices,
+ return true;
+ }
+
+-static inline void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info,
+- u64 logical, u64 length) { }
++static inline int btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info,
++ u64 logical, u64 length)
++{
++ return 0;
++}
+
+ static inline void btrfs_schedule_zone_finish_bg(struct btrfs_block_group *bg,
+ struct extent_buffer *eb) { }
+--
+2.51.0
+
--- /dev/null
+From e566c5390297c67166b64ac35818038e9c63be55 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 19 Sep 2025 01:12:27 +0000
+Subject: cpuset: Use new excpus for nocpu error check when enabling root
+ partition
+
+From: Chen Ridong <chenridong@huawei.com>
+
+[ Upstream commit 59d5de3655698679ad8fd2cc82228de4679c4263 ]
+
+A previous patch fixed a bug where new_prs should be assigned before
+checking housekeeping conflicts. This patch addresses another potential
+issue: the nocpu error check currently uses the xcpus which is not updated.
+Although no issue has been observed so far, the check should be performed
+using the new effective exclusive cpus.
+
+The comment has been removed because the function returns an error if
+nocpu checking fails, which is unrelated to the parent.
+
+Signed-off-by: Chen Ridong <chenridong@huawei.com>
+Reviewed-by: Waiman Long <longman@redhat.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/cgroup/cpuset.c | 6 +-----
+ 1 file changed, 1 insertion(+), 5 deletions(-)
+
+diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
+index 25f9565f798d4..13eb986172499 100644
+--- a/kernel/cgroup/cpuset.c
++++ b/kernel/cgroup/cpuset.c
+@@ -1679,11 +1679,7 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
+ if (prstate_housekeeping_conflict(new_prs, xcpus))
+ return PERR_HKEEPING;
+
+- /*
+- * A parent can be left with no CPU as long as there is no
+- * task directly associated with the parent partition.
+- */
+- if (nocpu)
++ if (tasks_nocpu_error(parent, cs, xcpus))
+ return PERR_NOCPUS;
+
+ deleting = cpumask_and(tmp->delmask, xcpus, parent->effective_xcpus);
+--
+2.51.0
+
--- /dev/null
+From 71bc3080d5e3c82d57a7d368802c0eeb3fe35796 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 16 Sep 2025 20:30:17 +0000
+Subject: EDAC/mc_sysfs: Increase legacy channel support to 16
+
+From: Avadhut Naik <avadhut.naik@amd.com>
+
+[ Upstream commit 6e1c2c6c2c40ce99e0d2633b212f43c702c1a002 ]
+
+Newer AMD systems can support up to 16 channels per EDAC "mc" device.
+These are detected by the EDAC module running on the device, and the
+current EDAC interface is appropriately enumerated.
+
+The legacy EDAC sysfs interface however, provides device attributes for
+channels 0 through 11 only. Consequently, the last four channels, 12
+through 15, will not be enumerated and will not be visible through the
+legacy sysfs interface.
+
+Add additional device attributes to ensure that all 16 channels, if
+present, are enumerated by and visible through the legacy EDAC sysfs
+interface.
+
+Signed-off-by: Avadhut Naik <avadhut.naik@amd.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://lore.kernel.org/20250916203242.1281036-1-avadhut.naik@amd.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/edac/edac_mc_sysfs.c | 24 ++++++++++++++++++++++++
+ 1 file changed, 24 insertions(+)
+
+diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
+index 4200aec048318..70dc0ee1cc08f 100644
+--- a/drivers/edac/edac_mc_sysfs.c
++++ b/drivers/edac/edac_mc_sysfs.c
+@@ -305,6 +305,14 @@ DEVICE_CHANNEL(ch10_dimm_label, S_IRUGO | S_IWUSR,
+ channel_dimm_label_show, channel_dimm_label_store, 10);
+ DEVICE_CHANNEL(ch11_dimm_label, S_IRUGO | S_IWUSR,
+ channel_dimm_label_show, channel_dimm_label_store, 11);
++DEVICE_CHANNEL(ch12_dimm_label, S_IRUGO | S_IWUSR,
++ channel_dimm_label_show, channel_dimm_label_store, 12);
++DEVICE_CHANNEL(ch13_dimm_label, S_IRUGO | S_IWUSR,
++ channel_dimm_label_show, channel_dimm_label_store, 13);
++DEVICE_CHANNEL(ch14_dimm_label, S_IRUGO | S_IWUSR,
++ channel_dimm_label_show, channel_dimm_label_store, 14);
++DEVICE_CHANNEL(ch15_dimm_label, S_IRUGO | S_IWUSR,
++ channel_dimm_label_show, channel_dimm_label_store, 15);
+
+ /* Total possible dynamic DIMM Label attribute file table */
+ static struct attribute *dynamic_csrow_dimm_attr[] = {
+@@ -320,6 +328,10 @@ static struct attribute *dynamic_csrow_dimm_attr[] = {
+ &dev_attr_legacy_ch9_dimm_label.attr.attr,
+ &dev_attr_legacy_ch10_dimm_label.attr.attr,
+ &dev_attr_legacy_ch11_dimm_label.attr.attr,
++ &dev_attr_legacy_ch12_dimm_label.attr.attr,
++ &dev_attr_legacy_ch13_dimm_label.attr.attr,
++ &dev_attr_legacy_ch14_dimm_label.attr.attr,
++ &dev_attr_legacy_ch15_dimm_label.attr.attr,
+ NULL
+ };
+
+@@ -348,6 +360,14 @@ DEVICE_CHANNEL(ch10_ce_count, S_IRUGO,
+ channel_ce_count_show, NULL, 10);
+ DEVICE_CHANNEL(ch11_ce_count, S_IRUGO,
+ channel_ce_count_show, NULL, 11);
++DEVICE_CHANNEL(ch12_ce_count, S_IRUGO,
++ channel_ce_count_show, NULL, 12);
++DEVICE_CHANNEL(ch13_ce_count, S_IRUGO,
++ channel_ce_count_show, NULL, 13);
++DEVICE_CHANNEL(ch14_ce_count, S_IRUGO,
++ channel_ce_count_show, NULL, 14);
++DEVICE_CHANNEL(ch15_ce_count, S_IRUGO,
++ channel_ce_count_show, NULL, 15);
+
+ /* Total possible dynamic ce_count attribute file table */
+ static struct attribute *dynamic_csrow_ce_count_attr[] = {
+@@ -363,6 +383,10 @@ static struct attribute *dynamic_csrow_ce_count_attr[] = {
+ &dev_attr_legacy_ch9_ce_count.attr.attr,
+ &dev_attr_legacy_ch10_ce_count.attr.attr,
+ &dev_attr_legacy_ch11_ce_count.attr.attr,
++ &dev_attr_legacy_ch12_ce_count.attr.attr,
++ &dev_attr_legacy_ch13_ce_count.attr.attr,
++ &dev_attr_legacy_ch14_ce_count.attr.attr,
++ &dev_attr_legacy_ch15_ce_count.attr.attr,
+ NULL
+ };
+
+--
+2.51.0
+
--- /dev/null
+From 52f1a40706a3d0a7fe1f00e045735ecd4d752fa9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Aug 2025 14:03:40 -0400
+Subject: perf: Have get_perf_callchain() return NULL if crosstask and user are
+ set
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+[ Upstream commit 153f9e74dec230f2e070e16fa061bc7adfd2c450 ]
+
+get_perf_callchain() doesn't support cross-task unwinding for user space
+stacks, have it return NULL if both the crosstask and user arguments are
+set.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20250820180428.426423415@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/events/callchain.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
+index d1a09e6f514c9..49d87e6db553f 100644
+--- a/kernel/events/callchain.c
++++ b/kernel/events/callchain.c
+@@ -223,6 +223,10 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
+ struct perf_callchain_entry_ctx ctx;
+ int rctx, start_entry_idx;
+
++ /* crosstask is not supported for user stacks */
++ if (crosstask && user && !kernel)
++ return NULL;
++
+ entry = get_callchain_entry(&rctx);
+ if (!entry)
+ return NULL;
+@@ -239,7 +243,7 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
+ perf_callchain_kernel(&ctx, regs);
+ }
+
+- if (user) {
++ if (user && !crosstask) {
+ if (!user_mode(regs)) {
+ if (current->flags & (PF_KTHREAD | PF_USER_WORKER))
+ regs = NULL;
+@@ -248,9 +252,6 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
+ }
+
+ if (regs) {
+- if (crosstask)
+- goto exit_put;
+-
+ if (add_mark)
+ perf_callchain_store_context(&ctx, PERF_CONTEXT_USER);
+
+@@ -260,7 +261,6 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
+ }
+ }
+
+-exit_put:
+ put_callchain_entry(rctx);
+
+ return entry;
+--
+2.51.0
+
--- /dev/null
+From a41a16b8fdc8a8c7396811aa98fdc15b7d36235f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Aug 2025 14:03:43 -0400
+Subject: perf: Skip user unwind if the task is a kernel thread
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+[ Upstream commit 16ed389227651330879e17bd83d43bd234006722 ]
+
+If the task is not a user thread, there's no user stack to unwind.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20250820180428.930791978@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/events/core.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/kernel/events/core.c b/kernel/events/core.c
+index 0339f60e34981..d6a86d8e9e59b 100644
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -7847,7 +7847,8 @@ struct perf_callchain_entry *
+ perf_callchain(struct perf_event *event, struct pt_regs *regs)
+ {
+ bool kernel = !event->attr.exclude_callchain_kernel;
+- bool user = !event->attr.exclude_callchain_user;
++ bool user = !event->attr.exclude_callchain_user &&
++ !(current->flags & (PF_KTHREAD | PF_USER_WORKER));
+ /* Disallow cross-task user callchains. */
+ bool crosstask = event->ctx->task && event->ctx->task != current;
+ const u32 max_stack = event->attr.sample_max_stack;
+--
+2.51.0
+
--- /dev/null
+From 5513a16de55c2fc4c77969e1300f73c3962cc296 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Aug 2025 14:03:41 -0400
+Subject: perf: Use current->flags & PF_KTHREAD|PF_USER_WORKER instead of
+ current->mm == NULL
+
+From: Steven Rostedt <rostedt@goodmis.org>
+
+[ Upstream commit 90942f9fac05702065ff82ed0bade0d08168d4ea ]
+
+To determine if a task is a kernel thread or not, it is more reliable to
+use (current->flags & (PF_KTHREAD|PF_USER_WORKERi)) than to rely on
+current->mm being NULL. That is because some kernel tasks (io_uring
+helpers) may have a mm field.
+
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20250820180428.592367294@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/events/callchain.c | 6 +++---
+ kernel/events/core.c | 4 ++--
+ 2 files changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
+index 8a47e52a454f4..d1a09e6f514c9 100644
+--- a/kernel/events/callchain.c
++++ b/kernel/events/callchain.c
+@@ -241,10 +241,10 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
+
+ if (user) {
+ if (!user_mode(regs)) {
+- if (current->mm)
+- regs = task_pt_regs(current);
+- else
++ if (current->flags & (PF_KTHREAD | PF_USER_WORKER))
+ regs = NULL;
++ else
++ regs = task_pt_regs(current);
+ }
+
+ if (regs) {
+diff --git a/kernel/events/core.c b/kernel/events/core.c
+index d60d48d482b01..0339f60e34981 100644
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -7095,7 +7095,7 @@ static void perf_sample_regs_user(struct perf_regs *regs_user,
+ if (user_mode(regs)) {
+ regs_user->abi = perf_reg_abi(current);
+ regs_user->regs = regs;
+- } else if (!(current->flags & PF_KTHREAD)) {
++ } else if (!(current->flags & (PF_KTHREAD | PF_USER_WORKER))) {
+ perf_get_regs_user(regs_user, regs);
+ } else {
+ regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE;
+@@ -7735,7 +7735,7 @@ static u64 perf_virt_to_phys(u64 virt)
+ * Try IRQ-safe get_user_page_fast_only first.
+ * If failed, leave phys_addr as 0.
+ */
+- if (current->mm != NULL) {
++ if (!(current->flags & (PF_KTHREAD | PF_USER_WORKER))) {
+ struct page *p;
+
+ pagefault_disable();
+--
+2.51.0
+
--- /dev/null
+From 5d4fbce76c5677e241e63851a259064ad6435df3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Aug 2025 10:30:31 +0800
+Subject: perf/x86/intel: Add ICL_FIXED_0_ADAPTIVE bit into
+ INTEL_FIXED_BITS_MASK
+
+From: Dapeng Mi <dapeng1.mi@linux.intel.com>
+
+[ Upstream commit 2676dbf9f4fb7f6739d1207c0f1deaf63124642a ]
+
+ICL_FIXED_0_ADAPTIVE is missed to be added into INTEL_FIXED_BITS_MASK,
+add it.
+
+With help of this new INTEL_FIXED_BITS_MASK, intel_pmu_enable_fixed() can
+be optimized. The old fixed counter control bits can be unconditionally
+cleared with INTEL_FIXED_BITS_MASK and then set new control bits base on
+new configuration.
+
+Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
+Tested-by: Yi Lai <yi1.lai@intel.com>
+Link: https://lore.kernel.org/r/20250820023032.17128-7-dapeng1.mi@linux.intel.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/events/intel/core.c | 10 +++-------
+ arch/x86/include/asm/perf_event.h | 6 +++++-
+ arch/x86/kvm/pmu.h | 2 +-
+ 3 files changed, 9 insertions(+), 9 deletions(-)
+
+diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
+index 36d8404f406de..acc0774519ce2 100644
+--- a/arch/x86/events/intel/core.c
++++ b/arch/x86/events/intel/core.c
+@@ -2812,8 +2812,8 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
+ {
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct hw_perf_event *hwc = &event->hw;
+- u64 mask, bits = 0;
+ int idx = hwc->idx;
++ u64 bits = 0;
+
+ if (is_topdown_idx(idx)) {
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+@@ -2849,14 +2849,10 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
+
+ idx -= INTEL_PMC_IDX_FIXED;
+ bits = intel_fixed_bits_by_idx(idx, bits);
+- mask = intel_fixed_bits_by_idx(idx, INTEL_FIXED_BITS_MASK);
+-
+- if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip) {
++ if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip)
+ bits |= intel_fixed_bits_by_idx(idx, ICL_FIXED_0_ADAPTIVE);
+- mask |= intel_fixed_bits_by_idx(idx, ICL_FIXED_0_ADAPTIVE);
+- }
+
+- cpuc->fixed_ctrl_val &= ~mask;
++ cpuc->fixed_ctrl_val &= ~intel_fixed_bits_by_idx(idx, INTEL_FIXED_BITS_MASK);
+ cpuc->fixed_ctrl_val |= bits;
+ }
+
+diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
+index aa351c4a20eee..c69b6498f6eaa 100644
+--- a/arch/x86/include/asm/perf_event.h
++++ b/arch/x86/include/asm/perf_event.h
+@@ -35,7 +35,6 @@
+ #define ARCH_PERFMON_EVENTSEL_EQ (1ULL << 36)
+ #define ARCH_PERFMON_EVENTSEL_UMASK2 (0xFFULL << 40)
+
+-#define INTEL_FIXED_BITS_MASK 0xFULL
+ #define INTEL_FIXED_BITS_STRIDE 4
+ #define INTEL_FIXED_0_KERNEL (1ULL << 0)
+ #define INTEL_FIXED_0_USER (1ULL << 1)
+@@ -47,6 +46,11 @@
+ #define ICL_EVENTSEL_ADAPTIVE (1ULL << 34)
+ #define ICL_FIXED_0_ADAPTIVE (1ULL << 32)
+
++#define INTEL_FIXED_BITS_MASK \
++ (INTEL_FIXED_0_KERNEL | INTEL_FIXED_0_USER | \
++ INTEL_FIXED_0_ANYTHREAD | INTEL_FIXED_0_ENABLE_PMI | \
++ ICL_FIXED_0_ADAPTIVE)
++
+ #define intel_fixed_bits_by_idx(_idx, _bits) \
+ ((_bits) << ((_idx) * INTEL_FIXED_BITS_STRIDE))
+
+diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
+index ad89d0bd60058..103604c4b33b5 100644
+--- a/arch/x86/kvm/pmu.h
++++ b/arch/x86/kvm/pmu.h
+@@ -13,7 +13,7 @@
+ #define MSR_IA32_MISC_ENABLE_PMU_RO_MASK (MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL | \
+ MSR_IA32_MISC_ENABLE_BTS_UNAVAIL)
+
+-/* retrieve the 4 bits for EN and PMI out of IA32_FIXED_CTR_CTRL */
++/* retrieve a fixed counter bits out of IA32_FIXED_CTR_CTRL */
+ #define fixed_ctrl_field(ctrl_reg, idx) \
+ (((ctrl_reg) >> ((idx) * INTEL_FIXED_BITS_STRIDE)) & INTEL_FIXED_BITS_MASK)
+
+--
+2.51.0
+
--- /dev/null
+From cbcae6872c4865b170a63648115d9a0c9d6b8783 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Sep 2025 09:03:26 -1000
+Subject: sched_ext: Make qmap dump operation non-destructive
+
+From: Tejun Heo <tj@kernel.org>
+
+[ Upstream commit d452972858e5cfa4262320ab74fe8f016460b96f ]
+
+The qmap dump operation was destructively consuming queue entries while
+displaying them. As dump can be triggered anytime, this can easily lead to
+stalls. Add a temporary dump_store queue and modify the dump logic to pop
+entries, display them, and then restore them back to the original queue.
+This allows dump operations to be performed without affecting the
+scheduler's queue state.
+
+Note that if racing against new enqueues during dump, ordering can get
+mixed up, but this is acceptable for debugging purposes.
+
+Acked-by: Andrea Righi <arighi@nvidia.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/sched_ext/scx_qmap.bpf.c | 18 +++++++++++++++++-
+ 1 file changed, 17 insertions(+), 1 deletion(-)
+
+diff --git a/tools/sched_ext/scx_qmap.bpf.c b/tools/sched_ext/scx_qmap.bpf.c
+index 5d1f880d1149e..e952f525599bd 100644
+--- a/tools/sched_ext/scx_qmap.bpf.c
++++ b/tools/sched_ext/scx_qmap.bpf.c
+@@ -56,7 +56,8 @@ struct qmap {
+ queue1 SEC(".maps"),
+ queue2 SEC(".maps"),
+ queue3 SEC(".maps"),
+- queue4 SEC(".maps");
++ queue4 SEC(".maps"),
++ dump_store SEC(".maps");
+
+ struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+@@ -578,11 +579,26 @@ void BPF_STRUCT_OPS(qmap_dump, struct scx_dump_ctx *dctx)
+ return;
+
+ scx_bpf_dump("QMAP FIFO[%d]:", i);
++
++ /*
++ * Dump can be invoked anytime and there is no way to iterate in
++ * a non-destructive way. Pop and store in dump_store and then
++ * restore afterwards. If racing against new enqueues, ordering
++ * can get mixed up.
++ */
+ bpf_repeat(4096) {
+ if (bpf_map_pop_elem(fifo, &pid))
+ break;
++ bpf_map_push_elem(&dump_store, &pid, 0);
+ scx_bpf_dump(" %d", pid);
+ }
++
++ bpf_repeat(4096) {
++ if (bpf_map_pop_elem(&dump_store, &pid))
++ break;
++ bpf_map_push_elem(fifo, &pid, 0);
++ }
++
+ scx_bpf_dump("\n");
+ }
+ }
+--
+2.51.0
+
--- /dev/null
+From 5d620bd1214b6de59a55396c50bcce763e0e69e6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 20 Jul 2025 13:21:30 +0200
+Subject: seccomp: passthrough uprobe systemcall without filtering
+
+From: Jiri Olsa <jolsa@kernel.org>
+
+[ Upstream commit 89d1d8434d246c96309a6068dfcf9e36dc61227b ]
+
+Adding uprobe as another exception to the seccomp filter alongside
+with the uretprobe syscall.
+
+Same as the uretprobe the uprobe syscall is installed by kernel as
+replacement for the breakpoint exception and is limited to x86_64
+arch and isn't expected to ever be supported in i386.
+
+Signed-off-by: Jiri Olsa <jolsa@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Kees Cook <kees@kernel.org>
+Link: https://lore.kernel.org/r/20250720112133.244369-21-jolsa@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/seccomp.c | 32 +++++++++++++++++++++++++-------
+ 1 file changed, 25 insertions(+), 7 deletions(-)
+
+diff --git a/kernel/seccomp.c b/kernel/seccomp.c
+index 267b00005eaf2..1eac0d2b8ecbe 100644
+--- a/kernel/seccomp.c
++++ b/kernel/seccomp.c
+@@ -733,6 +733,26 @@ seccomp_prepare_user_filter(const char __user *user_filter)
+ }
+
+ #ifdef SECCOMP_ARCH_NATIVE
++static bool seccomp_uprobe_exception(struct seccomp_data *sd)
++{
++#if defined __NR_uretprobe || defined __NR_uprobe
++#ifdef SECCOMP_ARCH_COMPAT
++ if (sd->arch == SECCOMP_ARCH_NATIVE)
++#endif
++ {
++#ifdef __NR_uretprobe
++ if (sd->nr == __NR_uretprobe)
++ return true;
++#endif
++#ifdef __NR_uprobe
++ if (sd->nr == __NR_uprobe)
++ return true;
++#endif
++ }
++#endif
++ return false;
++}
++
+ /**
+ * seccomp_is_const_allow - check if filter is constant allow with given data
+ * @fprog: The BPF programs
+@@ -750,13 +770,8 @@ static bool seccomp_is_const_allow(struct sock_fprog_kern *fprog,
+ return false;
+
+ /* Our single exception to filtering. */
+-#ifdef __NR_uretprobe
+-#ifdef SECCOMP_ARCH_COMPAT
+- if (sd->arch == SECCOMP_ARCH_NATIVE)
+-#endif
+- if (sd->nr == __NR_uretprobe)
+- return true;
+-#endif
++ if (seccomp_uprobe_exception(sd))
++ return true;
+
+ for (pc = 0; pc < fprog->len; pc++) {
+ struct sock_filter *insn = &fprog->filter[pc];
+@@ -1034,6 +1049,9 @@ static const int mode1_syscalls[] = {
+ __NR_seccomp_read, __NR_seccomp_write, __NR_seccomp_exit, __NR_seccomp_sigreturn,
+ #ifdef __NR_uretprobe
+ __NR_uretprobe,
++#endif
++#ifdef __NR_uprobe
++ __NR_uprobe,
+ #endif
+ -1, /* negative terminated */
+ };
+--
+2.51.0
+
net-sched-sch_qfq-fix-null-deref-in-agg_dequeue.patch
+audit-record-fanotify-event-regardless-of-presence-o.patch
+perf-x86-intel-add-icl_fixed_0_adaptive-bit-into-int.patch
+perf-use-current-flags-pf_kthread-pf_user_worker-ins.patch
+perf-have-get_perf_callchain-return-null-if-crosstas.patch
+perf-skip-user-unwind-if-the-task-is-a-kernel-thread.patch
+seccomp-passthrough-uprobe-systemcall-without-filter.patch
+x86-bugs-report-correct-retbleed-mitigation-status.patch
+x86-bugs-fix-reporting-of-lfence-retpoline.patch
+edac-mc_sysfs-increase-legacy-channel-support-to-16.patch
+cpuset-use-new-excpus-for-nocpu-error-check-when-ena.patch
+btrfs-abort-transaction-on-specific-error-places-whe.patch
+btrfs-abort-transaction-in-the-process_one_buffer-lo.patch
+btrfs-zoned-return-error-from-btrfs_zone_finish_endi.patch
+btrfs-zoned-refine-extent-allocator-hint-selection.patch
+btrfs-scrub-replace-max_t-min_t-with-clamp-in-scrub_.patch
+btrfs-always-drop-log-root-tree-reference-in-btrfs_r.patch
+btrfs-use-level-argument-in-log-tree-walk-callback-r.patch
+btrfs-abort-transaction-if-we-fail-to-update-inode-i.patch
+btrfs-tree-checker-add-inode-extref-checks.patch
+btrfs-use-smp_mb__after_atomic-when-forcing-cow-in-c.patch
+sched_ext-make-qmap-dump-operation-non-destructive.patch
+arch-add-the-macro-compile_offsets-to-all-the-asm-of.patch
--- /dev/null
+From a036c27a3f7cf40ed8c2a1810edd5984a792ad7c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 15 Sep 2025 08:47:05 -0500
+Subject: x86/bugs: Fix reporting of LFENCE retpoline
+
+From: David Kaplan <david.kaplan@amd.com>
+
+[ Upstream commit d1cc1baef67ac6c09b74629ca053bf3fb812f7dc ]
+
+The LFENCE retpoline mitigation is not secure but the kernel prints
+inconsistent messages about this fact. The dmesg log says 'Mitigation:
+LFENCE', implying the system is mitigated. But sysfs reports 'Vulnerable:
+LFENCE' implying the system (correctly) is not mitigated.
+
+Fix this by printing a consistent 'Vulnerable: LFENCE' string everywhere
+when this mitigation is selected.
+
+Signed-off-by: David Kaplan <david.kaplan@amd.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://lore.kernel.org/20250915134706.3201818-1-david.kaplan@amd.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kernel/cpu/bugs.c | 5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
+index 0c16457e06543..939401b5d2ef0 100644
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -1598,7 +1598,7 @@ spectre_v2_user_select_mitigation(void)
+ static const char * const spectre_v2_strings[] = {
+ [SPECTRE_V2_NONE] = "Vulnerable",
+ [SPECTRE_V2_RETPOLINE] = "Mitigation: Retpolines",
+- [SPECTRE_V2_LFENCE] = "Mitigation: LFENCE",
++ [SPECTRE_V2_LFENCE] = "Vulnerable: LFENCE",
+ [SPECTRE_V2_EIBRS] = "Mitigation: Enhanced / Automatic IBRS",
+ [SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced / Automatic IBRS + LFENCE",
+ [SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced / Automatic IBRS + Retpolines",
+@@ -3251,9 +3251,6 @@ static const char *spectre_bhi_state(void)
+
+ static ssize_t spectre_v2_show_state(char *buf)
+ {
+- if (spectre_v2_enabled == SPECTRE_V2_LFENCE)
+- return sysfs_emit(buf, "Vulnerable: LFENCE\n");
+-
+ if (spectre_v2_enabled == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled())
+ return sysfs_emit(buf, "Vulnerable: eIBRS with unprivileged eBPF\n");
+
+--
+2.51.0
+
--- /dev/null
+From f6920d5581ee7b7a5f0f69ff94e67cf03510b89e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 15 Sep 2025 08:47:06 -0500
+Subject: x86/bugs: Report correct retbleed mitigation status
+
+From: David Kaplan <david.kaplan@amd.com>
+
+[ Upstream commit 930f2361fe542a00de9ce6070b1b6edb976f1165 ]
+
+On Intel CPUs, the default retbleed mitigation is IBRS/eIBRS but this
+requires that a similar spectre_v2 mitigation is applied. If the user
+selects a different spectre_v2 mitigation (like spectre_v2=retpoline) a
+warning is printed but sysfs will still report 'Mitigation: IBRS' or
+'Mitigation: Enhanced IBRS'. This is incorrect because retbleed is not
+mitigated, and IBRS is not actually set.
+
+Fix this by choosing RETBLEED_MITIGATION_NONE in this scenario so the
+kernel correctly reports the system as vulnerable to retbleed.
+
+Signed-off-by: David Kaplan <david.kaplan@amd.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://lore.kernel.org/20250915134706.3201818-1-david.kaplan@amd.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kernel/cpu/bugs.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
+index f3cb559a598df..0c16457e06543 100644
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -1186,8 +1186,10 @@ static void __init retbleed_select_mitigation(void)
+ retbleed_mitigation = RETBLEED_MITIGATION_EIBRS;
+ break;
+ default:
+- if (retbleed_mitigation != RETBLEED_MITIGATION_STUFF)
++ if (retbleed_mitigation != RETBLEED_MITIGATION_STUFF) {
+ pr_err(RETBLEED_INTEL_MSG);
++ retbleed_mitigation = RETBLEED_MITIGATION_NONE;
++ }
+ }
+ }
+
+--
+2.51.0
+
--- /dev/null
+From 09755c95623ff205c25cd78c7f08e57b187626b1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 17 Sep 2025 14:09:13 +0800
+Subject: arch: Add the macro COMPILE_OFFSETS to all the asm-offsets.c
+
+From: Menglong Dong <menglong8.dong@gmail.com>
+
+[ Upstream commit 35561bab768977c9e05f1f1a9bc00134c85f3e28 ]
+
+The include/generated/asm-offsets.h is generated in Kbuild during
+compiling from arch/SRCARCH/kernel/asm-offsets.c. When we want to
+generate another similar offset header file, circular dependency can
+happen.
+
+For example, we want to generate a offset file include/generated/test.h,
+which is included in include/sched/sched.h. If we generate asm-offsets.h
+first, it will fail, as include/sched/sched.h is included in asm-offsets.c
+and include/generated/test.h doesn't exist; If we generate test.h first,
+it can't success neither, as include/generated/asm-offsets.h is included
+by it.
+
+In x86_64, the macro COMPILE_OFFSETS is used to avoid such circular
+dependency. We can generate asm-offsets.h first, and if the
+COMPILE_OFFSETS is defined, we don't include the "generated/test.h".
+
+And we define the macro COMPILE_OFFSETS for all the asm-offsets.c for this
+purpose.
+
+Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/alpha/kernel/asm-offsets.c | 1 +
+ arch/arc/kernel/asm-offsets.c | 1 +
+ arch/arm/kernel/asm-offsets.c | 2 ++
+ arch/arm64/kernel/asm-offsets.c | 1 +
+ arch/csky/kernel/asm-offsets.c | 1 +
+ arch/hexagon/kernel/asm-offsets.c | 1 +
+ arch/loongarch/kernel/asm-offsets.c | 2 ++
+ arch/m68k/kernel/asm-offsets.c | 1 +
+ arch/microblaze/kernel/asm-offsets.c | 1 +
+ arch/mips/kernel/asm-offsets.c | 2 ++
+ arch/nios2/kernel/asm-offsets.c | 1 +
+ arch/openrisc/kernel/asm-offsets.c | 1 +
+ arch/parisc/kernel/asm-offsets.c | 1 +
+ arch/powerpc/kernel/asm-offsets.c | 1 +
+ arch/riscv/kernel/asm-offsets.c | 1 +
+ arch/s390/kernel/asm-offsets.c | 1 +
+ arch/sh/kernel/asm-offsets.c | 1 +
+ arch/sparc/kernel/asm-offsets.c | 1 +
+ arch/um/kernel/asm-offsets.c | 2 ++
+ arch/xtensa/kernel/asm-offsets.c | 1 +
+ 20 files changed, 24 insertions(+)
+
+diff --git a/arch/alpha/kernel/asm-offsets.c b/arch/alpha/kernel/asm-offsets.c
+index e9dad60b147f3..1ebb058904992 100644
+--- a/arch/alpha/kernel/asm-offsets.c
++++ b/arch/alpha/kernel/asm-offsets.c
+@@ -4,6 +4,7 @@
+ * This code generates raw asm output which is post-processed to extract
+ * and format the required data.
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/types.h>
+ #include <linux/stddef.h>
+diff --git a/arch/arc/kernel/asm-offsets.c b/arch/arc/kernel/asm-offsets.c
+index f77deb7991757..2978da85fcb65 100644
+--- a/arch/arc/kernel/asm-offsets.c
++++ b/arch/arc/kernel/asm-offsets.c
+@@ -2,6 +2,7 @@
+ /*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/sched.h>
+ #include <linux/mm.h>
+diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
+index 123f4a8ef4466..2101938d27fcb 100644
+--- a/arch/arm/kernel/asm-offsets.c
++++ b/arch/arm/kernel/asm-offsets.c
+@@ -7,6 +7,8 @@
+ * This code generates raw asm output which is post-processed to extract
+ * and format the required data.
+ */
++#define COMPILE_OFFSETS
++
+ #include <linux/compiler.h>
+ #include <linux/sched.h>
+ #include <linux/mm.h>
+diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
+index 30d4bbe68661f..b6367ff3a49ca 100644
+--- a/arch/arm64/kernel/asm-offsets.c
++++ b/arch/arm64/kernel/asm-offsets.c
+@@ -6,6 +6,7 @@
+ * 2001-2002 Keith Owens
+ * Copyright (C) 2012 ARM Ltd.
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/arm_sdei.h>
+ #include <linux/sched.h>
+diff --git a/arch/csky/kernel/asm-offsets.c b/arch/csky/kernel/asm-offsets.c
+index d1e9035794733..5525c8e7e1d9e 100644
+--- a/arch/csky/kernel/asm-offsets.c
++++ b/arch/csky/kernel/asm-offsets.c
+@@ -1,5 +1,6 @@
+ // SPDX-License-Identifier: GPL-2.0
+ // Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
++#define COMPILE_OFFSETS
+
+ #include <linux/sched.h>
+ #include <linux/kernel_stat.h>
+diff --git a/arch/hexagon/kernel/asm-offsets.c b/arch/hexagon/kernel/asm-offsets.c
+index 03a7063f94561..50eea9fa6f137 100644
+--- a/arch/hexagon/kernel/asm-offsets.c
++++ b/arch/hexagon/kernel/asm-offsets.c
+@@ -8,6 +8,7 @@
+ *
+ * Copyright (c) 2010-2012, The Linux Foundation. All rights reserved.
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/compat.h>
+ #include <linux/types.h>
+diff --git a/arch/loongarch/kernel/asm-offsets.c b/arch/loongarch/kernel/asm-offsets.c
+index db1e4bb26b6a0..3017c71576009 100644
+--- a/arch/loongarch/kernel/asm-offsets.c
++++ b/arch/loongarch/kernel/asm-offsets.c
+@@ -4,6 +4,8 @@
+ *
+ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
+ */
++#define COMPILE_OFFSETS
++
+ #include <linux/types.h>
+ #include <linux/sched.h>
+ #include <linux/mm.h>
+diff --git a/arch/m68k/kernel/asm-offsets.c b/arch/m68k/kernel/asm-offsets.c
+index 906d732305374..67a1990f9d748 100644
+--- a/arch/m68k/kernel/asm-offsets.c
++++ b/arch/m68k/kernel/asm-offsets.c
+@@ -9,6 +9,7 @@
+ * #defines from the assembly-language output.
+ */
+
++#define COMPILE_OFFSETS
+ #define ASM_OFFSETS_C
+
+ #include <linux/stddef.h>
+diff --git a/arch/microblaze/kernel/asm-offsets.c b/arch/microblaze/kernel/asm-offsets.c
+index 104c3ac5f30c8..b4b67d58e7f6a 100644
+--- a/arch/microblaze/kernel/asm-offsets.c
++++ b/arch/microblaze/kernel/asm-offsets.c
+@@ -7,6 +7,7 @@
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/init.h>
+ #include <linux/stddef.h>
+diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c
+index 1e29efcba46e5..5debd9a3854a9 100644
+--- a/arch/mips/kernel/asm-offsets.c
++++ b/arch/mips/kernel/asm-offsets.c
+@@ -9,6 +9,8 @@
+ * Kevin Kissell, kevink@mips.com and Carsten Langgaard, carstenl@mips.com
+ * Copyright (C) 2000 MIPS Technologies, Inc.
+ */
++#define COMPILE_OFFSETS
++
+ #include <linux/compat.h>
+ #include <linux/types.h>
+ #include <linux/sched.h>
+diff --git a/arch/nios2/kernel/asm-offsets.c b/arch/nios2/kernel/asm-offsets.c
+index e3d9b7b6fb48a..88190b503ce5d 100644
+--- a/arch/nios2/kernel/asm-offsets.c
++++ b/arch/nios2/kernel/asm-offsets.c
+@@ -2,6 +2,7 @@
+ /*
+ * Copyright (C) 2011 Tobias Klauser <tklauser@distanz.ch>
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/stddef.h>
+ #include <linux/sched.h>
+diff --git a/arch/openrisc/kernel/asm-offsets.c b/arch/openrisc/kernel/asm-offsets.c
+index 710651d5aaae1..3cc826f2216b1 100644
+--- a/arch/openrisc/kernel/asm-offsets.c
++++ b/arch/openrisc/kernel/asm-offsets.c
+@@ -18,6 +18,7 @@
+ * compile this file to assembler, and then extract the
+ * #defines from the assembly-language output.
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/signal.h>
+ #include <linux/sched.h>
+diff --git a/arch/parisc/kernel/asm-offsets.c b/arch/parisc/kernel/asm-offsets.c
+index 757816a7bd4b2..9abfe65492c65 100644
+--- a/arch/parisc/kernel/asm-offsets.c
++++ b/arch/parisc/kernel/asm-offsets.c
+@@ -13,6 +13,7 @@
+ * Copyright (C) 2002 Randolph Chung <tausq with parisc-linux.org>
+ * Copyright (C) 2003 James Bottomley <jejb at parisc-linux.org>
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/types.h>
+ #include <linux/sched.h>
+diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
+index b3048f6d3822c..a4bc80b30410a 100644
+--- a/arch/powerpc/kernel/asm-offsets.c
++++ b/arch/powerpc/kernel/asm-offsets.c
+@@ -8,6 +8,7 @@
+ * compile this file to assembler, and then extract the
+ * #defines from the assembly-language output.
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/compat.h>
+ #include <linux/signal.h>
+diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
+index 6e8c0d6feae9e..7d42d3b8a32a7 100644
+--- a/arch/riscv/kernel/asm-offsets.c
++++ b/arch/riscv/kernel/asm-offsets.c
+@@ -3,6 +3,7 @@
+ * Copyright (C) 2012 Regents of the University of California
+ * Copyright (C) 2017 SiFive
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/kbuild.h>
+ #include <linux/mm.h>
+diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
+index 95ecad9c7d7d2..a8915663e917f 100644
+--- a/arch/s390/kernel/asm-offsets.c
++++ b/arch/s390/kernel/asm-offsets.c
+@@ -4,6 +4,7 @@
+ * This code generates raw asm output which is post-processed to extract
+ * and format the required data.
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/kbuild.h>
+ #include <linux/sched.h>
+diff --git a/arch/sh/kernel/asm-offsets.c b/arch/sh/kernel/asm-offsets.c
+index a0322e8328456..429b6a7631468 100644
+--- a/arch/sh/kernel/asm-offsets.c
++++ b/arch/sh/kernel/asm-offsets.c
+@@ -8,6 +8,7 @@
+ * compile this file to assembler, and then extract the
+ * #defines from the assembly-language output.
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/stddef.h>
+ #include <linux/types.h>
+diff --git a/arch/sparc/kernel/asm-offsets.c b/arch/sparc/kernel/asm-offsets.c
+index 3d9b9855dce91..6e660bde48dd8 100644
+--- a/arch/sparc/kernel/asm-offsets.c
++++ b/arch/sparc/kernel/asm-offsets.c
+@@ -10,6 +10,7 @@
+ *
+ * On sparc, thread_info data is static and TI_XXX offsets are computed by hand.
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/sched.h>
+ #include <linux/mm_types.h>
+diff --git a/arch/um/kernel/asm-offsets.c b/arch/um/kernel/asm-offsets.c
+index 1fb12235ab9c8..a69873aa697f4 100644
+--- a/arch/um/kernel/asm-offsets.c
++++ b/arch/um/kernel/asm-offsets.c
+@@ -1 +1,3 @@
++#define COMPILE_OFFSETS
++
+ #include <sysdep/kernel-offsets.h>
+diff --git a/arch/xtensa/kernel/asm-offsets.c b/arch/xtensa/kernel/asm-offsets.c
+index da38de20ae598..cfbced95e944a 100644
+--- a/arch/xtensa/kernel/asm-offsets.c
++++ b/arch/xtensa/kernel/asm-offsets.c
+@@ -11,6 +11,7 @@
+ *
+ * Chris Zankel <chris@zankel.net>
+ */
++#define COMPILE_OFFSETS
+
+ #include <asm/processor.h>
+ #include <asm/coprocessor.h>
+--
+2.51.0
+
--- /dev/null
+From a6297c943b52f9458b8ca489dfbe9bfdd26dce75 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 6 Aug 2025 17:04:07 -0400
+Subject: audit: record fanotify event regardless of presence of rules
+
+From: Richard Guy Briggs <rgb@redhat.com>
+
+[ Upstream commit ce8370e2e62a903e18be7dd0e0be2eee079501e1 ]
+
+When no audit rules are in place, fanotify event results are
+unconditionally dropped due to an explicit check for the existence of
+any audit rules. Given this is a report from another security
+sub-system, allow it to be recorded regardless of the existence of any
+audit rules.
+
+To test, install and run the fapolicyd daemon with default config. Then
+as an unprivileged user, create and run a very simple binary that should
+be denied. Then check for an event with
+ ausearch -m FANOTIFY -ts recent
+
+Link: https://issues.redhat.com/browse/RHEL-9065
+Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
+Signed-off-by: Paul Moore <paul@paul-moore.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/audit.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/include/linux/audit.h b/include/linux/audit.h
+index a394614ccd0b8..e3f06eba9c6e6 100644
+--- a/include/linux/audit.h
++++ b/include/linux/audit.h
+@@ -527,7 +527,7 @@ static inline void audit_log_kern_module(const char *name)
+
+ static inline void audit_fanotify(u32 response, struct fanotify_response_info_audit_rule *friar)
+ {
+- if (!audit_dummy_context())
++ if (audit_enabled)
+ __audit_fanotify(response, friar);
+ }
+
+--
+2.51.0
+
--- /dev/null
+From 496727f1f5bd4315290c755db82f0460635f17b2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 3 Sep 2025 17:43:04 +0100
+Subject: btrfs: abort transaction if we fail to update inode in log replay dir
+ fixup
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit 5a0565cad3ef7cbf4cf43d1dd1e849b156205292 ]
+
+If we fail to update the inode at link_to_fixup_dir(), we don't abort the
+transaction and propagate the error up the call chain, which makes it hard
+to pinpoint the error to the inode update. So abort the transaction if the
+inode update call fails, so that if it happens we known immediately.
+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/tree-log.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
+index 4f92aa15d9b1d..165d2ee500ca3 100644
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -1796,6 +1796,8 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
+ else
+ inc_nlink(vfs_inode);
+ ret = btrfs_update_inode(trans, inode);
++ if (ret)
++ btrfs_abort_transaction(trans, ret);
+ } else if (ret == -EEXIST) {
+ ret = 0;
+ }
+--
+2.51.0
+
--- /dev/null
+From b2c31af40dd6f88a468a8613c542de8306f31b47 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Jul 2025 15:49:31 +0100
+Subject: btrfs: abort transaction in the process_one_buffer() log tree walk
+ callback
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit e6dd405b6671b9753b98d8bdf76f8f0ed36c11cd ]
+
+In the process_one_buffer() log tree walk callback we return errors to the
+log tree walk caller and then the caller aborts the transaction, if we
+have one, or turns the fs into error state if we don't have one. While
+this reduces code it makes it harder to figure out where exactly an error
+came from. So add the transaction aborts after every failure inside the
+process_one_buffer() callback, so that it helps figuring out why failures
+happen.
+
+Reviewed-by: Boris Burkov <boris@bur.io>
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/tree-log.c | 20 ++++++++++++++++----
+ 1 file changed, 16 insertions(+), 4 deletions(-)
+
+diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
+index 6d92326a1a0c7..50ed84cb68a69 100644
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -347,6 +347,7 @@ static int process_one_buffer(struct btrfs_root *log,
+ struct extent_buffer *eb,
+ struct walk_control *wc, u64 gen, int level)
+ {
++ struct btrfs_trans_handle *trans = wc->trans;
+ struct btrfs_fs_info *fs_info = log->fs_info;
+ int ret = 0;
+
+@@ -361,18 +362,29 @@ static int process_one_buffer(struct btrfs_root *log,
+ };
+
+ ret = btrfs_read_extent_buffer(eb, &check);
+- if (ret)
++ if (ret) {
++ if (trans)
++ btrfs_abort_transaction(trans, ret);
++ else
++ btrfs_handle_fs_error(fs_info, ret, NULL);
+ return ret;
++ }
+ }
+
+ if (wc->pin) {
+- ret = btrfs_pin_extent_for_log_replay(wc->trans, eb);
+- if (ret)
++ ASSERT(trans != NULL);
++ ret = btrfs_pin_extent_for_log_replay(trans, eb);
++ if (ret) {
++ btrfs_abort_transaction(trans, ret);
+ return ret;
++ }
+
+ if (btrfs_buffer_uptodate(eb, gen, 0) &&
+- btrfs_header_level(eb) == 0)
++ btrfs_header_level(eb) == 0) {
+ ret = btrfs_exclude_logged_extents(eb);
++ if (ret)
++ btrfs_abort_transaction(trans, ret);
++ }
+ }
+ return ret;
+ }
+--
+2.51.0
+
--- /dev/null
+From b70c9bc307275209743b38f5f7c7507bef7b311d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Jul 2025 14:56:11 +0100
+Subject: btrfs: abort transaction on specific error places when walking log
+ tree
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit 6ebd726b104fa99d47c0d45979e6a6109844ac18 ]
+
+We do several things while walking a log tree (for replaying and for
+freeing a log tree) like reading extent buffers and cleaning them up,
+but we don't immediately abort the transaction, or turn the fs into an
+error state, when one of these things fails. Instead we the transaction
+abort or turn the fs into error state in the caller of the entry point
+function that walks a log tree - walk_log_tree() - which means we don't
+get to know exactly where an error came from.
+
+Improve on this by doing a transaction abort / turn fs into error state
+after each such failure so that when it happens we have a better
+understanding where the failure comes from. This deliberately leaves
+the transaction abort / turn fs into error state in the callers of
+walk_log_tree() as to ensure we don't get into an inconsistent state in
+case we forget to do it deeper in call chain. It also deliberately does
+not do it after errors from the calls to the callback defined in
+struct walk_control::process_func(), as we will do it later on another
+patch.
+
+Reviewed-by: Boris Burkov <boris@bur.io>
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/tree-log.c | 33 ++++++++++++++++++++++++++++-----
+ 1 file changed, 28 insertions(+), 5 deletions(-)
+
+diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
+index 7a63afedd01e6..6d92326a1a0c7 100644
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -2630,15 +2630,24 @@ static int unaccount_log_buffer(struct btrfs_fs_info *fs_info, u64 start)
+ static int clean_log_buffer(struct btrfs_trans_handle *trans,
+ struct extent_buffer *eb)
+ {
++ int ret;
++
+ btrfs_tree_lock(eb);
+ btrfs_clear_buffer_dirty(trans, eb);
+ wait_on_extent_buffer_writeback(eb);
+ btrfs_tree_unlock(eb);
+
+- if (trans)
+- return btrfs_pin_reserved_extent(trans, eb);
++ if (trans) {
++ ret = btrfs_pin_reserved_extent(trans, eb);
++ if (ret)
++ btrfs_abort_transaction(trans, ret);
++ return ret;
++ }
+
+- return unaccount_log_buffer(eb->fs_info, eb->start);
++ ret = unaccount_log_buffer(eb->fs_info, eb->start);
++ if (ret)
++ btrfs_handle_fs_error(eb->fs_info, ret, NULL);
++ return ret;
+ }
+
+ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
+@@ -2674,8 +2683,14 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
+ next = btrfs_find_create_tree_block(fs_info, bytenr,
+ btrfs_header_owner(cur),
+ *level - 1);
+- if (IS_ERR(next))
+- return PTR_ERR(next);
++ if (IS_ERR(next)) {
++ ret = PTR_ERR(next);
++ if (trans)
++ btrfs_abort_transaction(trans, ret);
++ else
++ btrfs_handle_fs_error(fs_info, ret, NULL);
++ return ret;
++ }
+
+ if (*level == 1) {
+ ret = wc->process_func(root, next, wc, ptr_gen,
+@@ -2690,6 +2705,10 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
+ ret = btrfs_read_extent_buffer(next, &check);
+ if (ret) {
+ free_extent_buffer(next);
++ if (trans)
++ btrfs_abort_transaction(trans, ret);
++ else
++ btrfs_handle_fs_error(fs_info, ret, NULL);
+ return ret;
+ }
+
+@@ -2705,6 +2724,10 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
+ ret = btrfs_read_extent_buffer(next, &check);
+ if (ret) {
+ free_extent_buffer(next);
++ if (trans)
++ btrfs_abort_transaction(trans, ret);
++ else
++ btrfs_handle_fs_error(fs_info, ret, NULL);
+ return ret;
+ }
+
+--
+2.51.0
+
--- /dev/null
+From 043347135f8442ac0b1bf5b6bfc12f2c35374b9c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 27 Aug 2025 12:10:28 +0100
+Subject: btrfs: always drop log root tree reference in btrfs_replay_log()
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit 2f5b8095ea47b142c56c09755a8b1e14145a2d30 ]
+
+Currently we have this odd behaviour:
+
+1) At btrfs_replay_log() we drop the reference of the log root tree if
+ the call to btrfs_recover_log_trees() failed;
+
+2) But if the call to btrfs_recover_log_trees() did not fail, we don't
+ drop the reference in btrfs_replay_log() - we expect that
+ btrfs_recover_log_trees() does it in case it returns success.
+
+Let's simplify this and make btrfs_replay_log() always drop the reference
+on the log root tree, not only this simplifies code as it's what makes
+sense since it's btrfs_replay_log() who grabbed the reference in the first
+place.
+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/disk-io.c | 2 +-
+ fs/btrfs/tree-log.c | 1 -
+ 2 files changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
+index 70fc4e7cc5a0e..0b02e36b30558 100644
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -2087,10 +2087,10 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
+
+ /* returns with log_tree_root freed on success */
+ ret = btrfs_recover_log_trees(log_tree_root);
++ btrfs_put_root(log_tree_root);
+ if (ret) {
+ btrfs_handle_fs_error(fs_info, ret,
+ "Failed to recover log tree");
+- btrfs_put_root(log_tree_root);
+ return ret;
+ }
+
+diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
+index 50ed84cb68a69..518cd74191e77 100644
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -7469,7 +7469,6 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
+
+ log_root_tree->log_root = NULL;
+ clear_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags);
+- btrfs_put_root(log_root_tree);
+
+ return 0;
+ error:
+--
+2.51.0
+
--- /dev/null
+From 6961fd2310f25663e1cc6a8e7977438fa016289f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 1 Sep 2025 17:01:44 +0200
+Subject: btrfs: scrub: replace max_t()/min_t() with clamp() in
+ scrub_throttle_dev_io()
+
+From: Thorsten Blum <thorsten.blum@linux.dev>
+
+[ Upstream commit a7f3dfb8293c4cee99743132d69863a92e8f4875 ]
+
+Replace max_t() followed by min_t() with a single clamp().
+
+As was pointed by David Laight in
+https://lore.kernel.org/linux-btrfs/20250906122458.75dfc8f0@pumpkin/
+the calculation may overflow u32 when the input value is too large, so
+clamp_t() is not used. In practice the expected values are in range of
+megabytes to gigabytes (throughput limit) so the bug would not happen.
+
+Signed-off-by: Thorsten Blum <thorsten.blum@linux.dev>
+Reviewed-by: David Sterba <dsterba@suse.com>
+[ Use clamp() and add explanation. ]
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/scrub.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
+index 6776e6ab8d108..fd4c1ca34b5e4 100644
+--- a/fs/btrfs/scrub.c
++++ b/fs/btrfs/scrub.c
+@@ -1369,8 +1369,7 @@ static void scrub_throttle_dev_io(struct scrub_ctx *sctx, struct btrfs_device *d
+ * Slice is divided into intervals when the IO is submitted, adjust by
+ * bwlimit and maximum of 64 intervals.
+ */
+- div = max_t(u32, 1, (u32)(bwlimit / (16 * 1024 * 1024)));
+- div = min_t(u32, 64, div);
++ div = clamp(bwlimit / (16 * 1024 * 1024), 1, 64);
+
+ /* Start new epoch, set deadline */
+ now = ktime_get();
+--
+2.51.0
+
--- /dev/null
+From 5613ea5ed3366b504037789c8bb8cebb30a3524f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 16 Sep 2025 08:34:05 +0930
+Subject: btrfs: tree-checker: add inode extref checks
+
+From: Qu Wenruo <wqu@suse.com>
+
+[ Upstream commit aab9458b9f0019e97fae394c2d6d9d1a03addfb3 ]
+
+Like inode refs, inode extrefs have a variable length name, which means
+we have to do a proper check to make sure no header nor name can exceed
+the item limits.
+
+The check itself is very similar to check_inode_ref(), just a different
+structure (btrfs_inode_extref vs btrfs_inode_ref).
+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/tree-checker.c | 37 +++++++++++++++++++++++++++++++++++++
+ 1 file changed, 37 insertions(+)
+
+diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
+index a997c7cc35a26..a83e455f813bf 100644
+--- a/fs/btrfs/tree-checker.c
++++ b/fs/btrfs/tree-checker.c
+@@ -183,6 +183,7 @@ static bool check_prev_ino(struct extent_buffer *leaf,
+ /* Only these key->types needs to be checked */
+ ASSERT(key->type == BTRFS_XATTR_ITEM_KEY ||
+ key->type == BTRFS_INODE_REF_KEY ||
++ key->type == BTRFS_INODE_EXTREF_KEY ||
+ key->type == BTRFS_DIR_INDEX_KEY ||
+ key->type == BTRFS_DIR_ITEM_KEY ||
+ key->type == BTRFS_EXTENT_DATA_KEY);
+@@ -1782,6 +1783,39 @@ static int check_inode_ref(struct extent_buffer *leaf,
+ return 0;
+ }
+
++static int check_inode_extref(struct extent_buffer *leaf,
++ struct btrfs_key *key, struct btrfs_key *prev_key,
++ int slot)
++{
++ unsigned long ptr = btrfs_item_ptr_offset(leaf, slot);
++ unsigned long end = ptr + btrfs_item_size(leaf, slot);
++
++ if (unlikely(!check_prev_ino(leaf, key, slot, prev_key)))
++ return -EUCLEAN;
++
++ while (ptr < end) {
++ struct btrfs_inode_extref *extref = (struct btrfs_inode_extref *)ptr;
++ u16 namelen;
++
++ if (unlikely(ptr + sizeof(*extref)) > end) {
++ inode_ref_err(leaf, slot,
++ "inode extref overflow, ptr %lu end %lu inode_extref size %zu",
++ ptr, end, sizeof(*extref));
++ return -EUCLEAN;
++ }
++
++ namelen = btrfs_inode_extref_name_len(leaf, extref);
++ if (unlikely(ptr + sizeof(*extref) + namelen > end)) {
++ inode_ref_err(leaf, slot,
++ "inode extref overflow, ptr %lu end %lu namelen %u",
++ ptr, end, namelen);
++ return -EUCLEAN;
++ }
++ ptr += sizeof(*extref) + namelen;
++ }
++ return 0;
++}
++
+ static int check_raid_stripe_extent(const struct extent_buffer *leaf,
+ const struct btrfs_key *key, int slot)
+ {
+@@ -1893,6 +1927,9 @@ static enum btrfs_tree_block_status check_leaf_item(struct extent_buffer *leaf,
+ case BTRFS_INODE_REF_KEY:
+ ret = check_inode_ref(leaf, key, prev_key, slot);
+ break;
++ case BTRFS_INODE_EXTREF_KEY:
++ ret = check_inode_extref(leaf, key, prev_key, slot);
++ break;
+ case BTRFS_BLOCK_GROUP_ITEM_KEY:
+ ret = check_block_group_item(leaf, key, slot);
+ break;
+--
+2.51.0
+
--- /dev/null
+From 17181f1cd33cfcd7024c3d0606e424d27ff2a1fe Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 28 Aug 2025 17:46:18 +0100
+Subject: btrfs: use level argument in log tree walk callback
+ replay_one_buffer()
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit 6cb7f0b8c9b0d6a35682335fea88bd26f089306f ]
+
+We already have the extent buffer's level in an argument, there's no need
+to first ensure the extent buffer's data is loaded (by calling
+btrfs_read_extent_buffer()) and then call btrfs_header_level() to check
+the level. So use the level argument and do the check before calling
+btrfs_read_extent_buffer().
+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/tree-log.c | 8 +++-----
+ 1 file changed, 3 insertions(+), 5 deletions(-)
+
+diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
+index 518cd74191e77..4f92aa15d9b1d 100644
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -2461,15 +2461,13 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
+ int i;
+ int ret;
+
++ if (level != 0)
++ return 0;
++
+ ret = btrfs_read_extent_buffer(eb, &check);
+ if (ret)
+ return ret;
+
+- level = btrfs_header_level(eb);
+-
+- if (level != 0)
+- return 0;
+-
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+--
+2.51.0
+
--- /dev/null
+From 2315af132a33b20e24b3a740bfc56993b3f29be5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Sep 2025 12:09:14 +0100
+Subject: btrfs: use smp_mb__after_atomic() when forcing COW in
+ create_pending_snapshot()
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit 45c222468d33202c07c41c113301a4b9c8451b8f ]
+
+After setting the BTRFS_ROOT_FORCE_COW flag on the root we are doing a
+full write barrier, smp_wmb(), but we don't need to, all we need is a
+smp_mb__after_atomic(). The use of the smp_wmb() is from the old days
+when we didn't use a bit and used instead an int field in the root to
+signal if cow is forced. After the int field was changed to a bit in
+the root's state (flags field), we forgot to update the memory barrier
+in create_pending_snapshot() to smp_mb__after_atomic(), but we did the
+change in commit_fs_roots() after clearing BTRFS_ROOT_FORCE_COW. That
+happened in commit 27cdeb7096b8 ("Btrfs: use bitfield instead of integer
+data type for the some variants in btrfs_root"). On the reader side, in
+should_cow_block(), we also use the counterpart smp_mb__before_atomic()
+which generates further confusion.
+
+So change the smp_wmb() to smp_mb__after_atomic(). In fact we don't
+even need any barrier at all since create_pending_snapshot() is called
+in the critical section of a transaction commit and therefore no one
+can concurrently join/attach the transaction, or start a new one, until
+the transaction is unblocked. By the time someone starts a new transaction
+and enters should_cow_block(), a lot of implicit memory barriers already
+took place by having acquired several locks such as fs_info->trans_lock
+and extent buffer locks on the root node at least. Nevertlheless, for
+consistency use smp_mb__after_atomic() after setting the force cow bit
+in create_pending_snapshot().
+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/transaction.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
+index c5c0d9cf1a808..a4e486a600bed 100644
+--- a/fs/btrfs/transaction.c
++++ b/fs/btrfs/transaction.c
+@@ -1806,7 +1806,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
+ }
+ /* see comments in should_cow_block() */
+ set_bit(BTRFS_ROOT_FORCE_COW, &root->state);
+- smp_wmb();
++ smp_mb__after_atomic();
+
+ btrfs_set_root_node(new_root_item, tmp);
+ /* record when the snapshot was created in key.offset */
+--
+2.51.0
+
--- /dev/null
+From e0264b290f1d5792d4664a0fe27c898716f36a81 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Jul 2025 11:13:15 +0900
+Subject: btrfs: zoned: refine extent allocator hint selection
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+[ Upstream commit 0d703963d297964451783e1a0688ebdf74cd6151 ]
+
+The hint block group selection in the extent allocator is wrong in the
+first place, as it can select the dedicated data relocation block group for
+the normal data allocation.
+
+Since we separated the normal data space_info and the data relocation
+space_info, we can easily identify a block group is for data relocation or
+not. Do not choose it for the normal data allocation.
+
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/extent-tree.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
+index 97d517cdf2df7..682d21a73a67a 100644
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -4297,7 +4297,8 @@ static int prepare_allocation_clustered(struct btrfs_fs_info *fs_info,
+ }
+
+ static int prepare_allocation_zoned(struct btrfs_fs_info *fs_info,
+- struct find_free_extent_ctl *ffe_ctl)
++ struct find_free_extent_ctl *ffe_ctl,
++ struct btrfs_space_info *space_info)
+ {
+ if (ffe_ctl->for_treelog) {
+ spin_lock(&fs_info->treelog_bg_lock);
+@@ -4321,6 +4322,7 @@ static int prepare_allocation_zoned(struct btrfs_fs_info *fs_info,
+ u64 avail = block_group->zone_capacity - block_group->alloc_offset;
+
+ if (block_group_bits(block_group, ffe_ctl->flags) &&
++ block_group->space_info == space_info &&
+ avail >= ffe_ctl->num_bytes) {
+ ffe_ctl->hint_byte = block_group->start;
+ break;
+@@ -4342,7 +4344,7 @@ static int prepare_allocation(struct btrfs_fs_info *fs_info,
+ return prepare_allocation_clustered(fs_info, ffe_ctl,
+ space_info, ins);
+ case BTRFS_EXTENT_ALLOC_ZONED:
+- return prepare_allocation_zoned(fs_info, ffe_ctl);
++ return prepare_allocation_zoned(fs_info, ffe_ctl, space_info);
+ default:
+ BUG();
+ }
+--
+2.51.0
+
--- /dev/null
+From 4230345a12b197f63729c55c765d44f98c2ca78d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 22 Jul 2025 13:39:11 +0200
+Subject: btrfs: zoned: return error from btrfs_zone_finish_endio()
+
+From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+
+[ Upstream commit 3c44cd3c79fcb38a86836dea6ff8fec322a9e68c ]
+
+Now that btrfs_zone_finish_endio_workfn() is directly calling
+do_zone_finish() the only caller of btrfs_zone_finish_endio() is
+btrfs_finish_one_ordered().
+
+btrfs_finish_one_ordered() already has error handling in-place so
+btrfs_zone_finish_endio() can return an error if the block group lookup
+fails.
+
+Also as btrfs_zone_finish_endio() already checks for zoned filesystems and
+returns early, there's no need to do this in the caller.
+
+Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
+Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/inode.c | 7 ++++---
+ fs/btrfs/zoned.c | 8 +++++---
+ fs/btrfs/zoned.h | 9 ++++++---
+ 3 files changed, 15 insertions(+), 9 deletions(-)
+
+diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
+index 4031cbdea0740..41da405181b4f 100644
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -3107,9 +3107,10 @@ int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent)
+ goto out;
+ }
+
+- if (btrfs_is_zoned(fs_info))
+- btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr,
+- ordered_extent->disk_num_bytes);
++ ret = btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr,
++ ordered_extent->disk_num_bytes);
++ if (ret)
++ goto out;
+
+ if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) {
+ truncated = true;
+diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
+index 87c5dd3ad016e..fcdf7b058a584 100644
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -2464,16 +2464,17 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)
+ return ret;
+ }
+
+-void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 length)
++int btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 length)
+ {
+ struct btrfs_block_group *block_group;
+ u64 min_alloc_bytes;
+
+ if (!btrfs_is_zoned(fs_info))
+- return;
++ return 0;
+
+ block_group = btrfs_lookup_block_group(fs_info, logical);
+- ASSERT(block_group);
++ if (WARN_ON_ONCE(!block_group))
++ return -ENOENT;
+
+ /* No MIXED_BG on zoned btrfs. */
+ if (block_group->flags & BTRFS_BLOCK_GROUP_DATA)
+@@ -2490,6 +2491,7 @@ void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 len
+
+ out:
+ btrfs_put_block_group(block_group);
++ return 0;
+ }
+
+ static void btrfs_zone_finish_endio_workfn(struct work_struct *work)
+diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
+index 6e11533b8e14c..17c5656580dd9 100644
+--- a/fs/btrfs/zoned.h
++++ b/fs/btrfs/zoned.h
+@@ -83,7 +83,7 @@ int btrfs_sync_zone_write_pointer(struct btrfs_device *tgt_dev, u64 logical,
+ bool btrfs_zone_activate(struct btrfs_block_group *block_group);
+ int btrfs_zone_finish(struct btrfs_block_group *block_group);
+ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags);
+-void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical,
++int btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical,
+ u64 length);
+ void btrfs_schedule_zone_finish_bg(struct btrfs_block_group *bg,
+ struct extent_buffer *eb);
+@@ -234,8 +234,11 @@ static inline bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices,
+ return true;
+ }
+
+-static inline void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info,
+- u64 logical, u64 length) { }
++static inline int btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info,
++ u64 logical, u64 length)
++{
++ return 0;
++}
+
+ static inline void btrfs_schedule_zone_finish_bg(struct btrfs_block_group *bg,
+ struct extent_buffer *eb) { }
+--
+2.51.0
+
--- /dev/null
+From 1a27b5e454cf61ce28e3b82e30c4ca2f682381f8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 19 Sep 2025 01:12:27 +0000
+Subject: cpuset: Use new excpus for nocpu error check when enabling root
+ partition
+
+From: Chen Ridong <chenridong@huawei.com>
+
+[ Upstream commit 59d5de3655698679ad8fd2cc82228de4679c4263 ]
+
+A previous patch fixed a bug where new_prs should be assigned before
+checking housekeeping conflicts. This patch addresses another potential
+issue: the nocpu error check currently uses the xcpus which is not updated.
+Although no issue has been observed so far, the check should be performed
+using the new effective exclusive cpus.
+
+The comment has been removed because the function returns an error if
+nocpu checking fails, which is unrelated to the parent.
+
+Signed-off-by: Chen Ridong <chenridong@huawei.com>
+Reviewed-by: Waiman Long <longman@redhat.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/cgroup/cpuset.c | 6 +-----
+ 1 file changed, 1 insertion(+), 5 deletions(-)
+
+diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
+index fef93032fe7e4..fd890b34a8403 100644
+--- a/kernel/cgroup/cpuset.c
++++ b/kernel/cgroup/cpuset.c
+@@ -1728,11 +1728,7 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
+ if (prstate_housekeeping_conflict(new_prs, xcpus))
+ return PERR_HKEEPING;
+
+- /*
+- * A parent can be left with no CPU as long as there is no
+- * task directly associated with the parent partition.
+- */
+- if (nocpu)
++ if (tasks_nocpu_error(parent, cs, xcpus))
+ return PERR_NOCPUS;
+
+ /*
+--
+2.51.0
+
--- /dev/null
+From d652aa2ed5235fd64ae767808908b000818e4502 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 29 Aug 2025 03:19:54 +0800
+Subject: EDAC: Fix wrong executable file modes for C source files
+
+From: Kuan-Wei Chiu <visitorckw@gmail.com>
+
+[ Upstream commit 71965cae7db394ff5ba3b2d2befe4e136ceec268 ]
+
+Three EDAC source files were mistakenly marked as executable when adding the
+EDAC scrub controls.
+
+These are plain C source files and should not carry the executable bit.
+Correcting their modes follows the principle of least privilege and avoids
+unnecessary execute permissions in the repository.
+
+ [ bp: Massage commit message. ]
+
+Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://lore.kernel.org/20250828191954.903125-1-visitorckw@gmail.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/edac/ecs.c | 0
+ drivers/edac/mem_repair.c | 0
+ drivers/edac/scrub.c | 0
+ 3 files changed, 0 insertions(+), 0 deletions(-)
+ mode change 100755 => 100644 drivers/edac/ecs.c
+ mode change 100755 => 100644 drivers/edac/mem_repair.c
+ mode change 100755 => 100644 drivers/edac/scrub.c
+
+diff --git a/drivers/edac/ecs.c b/drivers/edac/ecs.c
+old mode 100755
+new mode 100644
+diff --git a/drivers/edac/mem_repair.c b/drivers/edac/mem_repair.c
+old mode 100755
+new mode 100644
+diff --git a/drivers/edac/scrub.c b/drivers/edac/scrub.c
+old mode 100755
+new mode 100644
+--
+2.51.0
+
--- /dev/null
+From 90175ae118a68e96eeb97a03511afe3d8cccbee3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 19 Aug 2025 09:17:39 -0700
+Subject: EDAC/ie31200: Add two more Intel Alder Lake-S SoCs for EDAC support
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Kyle Manna <kyle@kylemanna.com>
+
+[ Upstream commit 71b69f817e91b588030d7d47ddbdc4857a92eb4e ]
+
+Host Device IDs (DID0) correspond to:
+* Intel Core i7-12700K
+* Intel Core i5-12600K
+
+See documentation:
+* 12th Generation Intel® Core™ Processors Datasheet
+ * Volume 1 of 2, Doc. No.: 655258, Rev.: 011
+ * https://edc.intel.com/output/DownloadPdfDocument?id=8297 (PDF)
+
+Signed-off-by: Kyle Manna <kyle@kylemanna.com>
+Signed-off-by: Tony Luck <tony.luck@intel.com>
+Reviewed-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
+Link: https://lore.kernel.org/r/20250819161739.3241152-1-kyle@kylemanna.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/edac/ie31200_edac.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c
+index 5c1fa1c0d12e3..5a080ab65476d 100644
+--- a/drivers/edac/ie31200_edac.c
++++ b/drivers/edac/ie31200_edac.c
+@@ -99,6 +99,8 @@
+
+ /* Alder Lake-S */
+ #define PCI_DEVICE_ID_INTEL_IE31200_ADL_S_1 0x4660
++#define PCI_DEVICE_ID_INTEL_IE31200_ADL_S_2 0x4668 /* 8P+4E, e.g. i7-12700K */
++#define PCI_DEVICE_ID_INTEL_IE31200_ADL_S_3 0x4648 /* 6P+4E, e.g. i5-12600K */
+
+ /* Bartlett Lake-S */
+ #define PCI_DEVICE_ID_INTEL_IE31200_BTL_S_1 0x4639
+@@ -761,6 +763,8 @@ static const struct pci_device_id ie31200_pci_tbl[] = {
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_6), (kernel_ulong_t)&rpl_s_cfg},
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_HX_1), (kernel_ulong_t)&rpl_s_cfg},
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_ADL_S_1), (kernel_ulong_t)&rpl_s_cfg},
++ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_ADL_S_2), (kernel_ulong_t)&rpl_s_cfg},
++ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_ADL_S_3), (kernel_ulong_t)&rpl_s_cfg},
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_BTL_S_1), (kernel_ulong_t)&rpl_s_cfg},
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_BTL_S_2), (kernel_ulong_t)&rpl_s_cfg},
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_BTL_S_3), (kernel_ulong_t)&rpl_s_cfg},
+--
+2.51.0
+
--- /dev/null
+From f47a7852e3030b5b0c360943fd302ba833f9999f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 16 Sep 2025 20:30:17 +0000
+Subject: EDAC/mc_sysfs: Increase legacy channel support to 16
+
+From: Avadhut Naik <avadhut.naik@amd.com>
+
+[ Upstream commit 6e1c2c6c2c40ce99e0d2633b212f43c702c1a002 ]
+
+Newer AMD systems can support up to 16 channels per EDAC "mc" device.
+These are detected by the EDAC module running on the device, and the
+current EDAC interface is appropriately enumerated.
+
+The legacy EDAC sysfs interface however, provides device attributes for
+channels 0 through 11 only. Consequently, the last four channels, 12
+through 15, will not be enumerated and will not be visible through the
+legacy sysfs interface.
+
+Add additional device attributes to ensure that all 16 channels, if
+present, are enumerated by and visible through the legacy EDAC sysfs
+interface.
+
+Signed-off-by: Avadhut Naik <avadhut.naik@amd.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://lore.kernel.org/20250916203242.1281036-1-avadhut.naik@amd.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/edac/edac_mc_sysfs.c | 24 ++++++++++++++++++++++++
+ 1 file changed, 24 insertions(+)
+
+diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
+index 0f338adf7d937..8689631f19053 100644
+--- a/drivers/edac/edac_mc_sysfs.c
++++ b/drivers/edac/edac_mc_sysfs.c
+@@ -305,6 +305,14 @@ DEVICE_CHANNEL(ch10_dimm_label, S_IRUGO | S_IWUSR,
+ channel_dimm_label_show, channel_dimm_label_store, 10);
+ DEVICE_CHANNEL(ch11_dimm_label, S_IRUGO | S_IWUSR,
+ channel_dimm_label_show, channel_dimm_label_store, 11);
++DEVICE_CHANNEL(ch12_dimm_label, S_IRUGO | S_IWUSR,
++ channel_dimm_label_show, channel_dimm_label_store, 12);
++DEVICE_CHANNEL(ch13_dimm_label, S_IRUGO | S_IWUSR,
++ channel_dimm_label_show, channel_dimm_label_store, 13);
++DEVICE_CHANNEL(ch14_dimm_label, S_IRUGO | S_IWUSR,
++ channel_dimm_label_show, channel_dimm_label_store, 14);
++DEVICE_CHANNEL(ch15_dimm_label, S_IRUGO | S_IWUSR,
++ channel_dimm_label_show, channel_dimm_label_store, 15);
+
+ /* Total possible dynamic DIMM Label attribute file table */
+ static struct attribute *dynamic_csrow_dimm_attr[] = {
+@@ -320,6 +328,10 @@ static struct attribute *dynamic_csrow_dimm_attr[] = {
+ &dev_attr_legacy_ch9_dimm_label.attr.attr,
+ &dev_attr_legacy_ch10_dimm_label.attr.attr,
+ &dev_attr_legacy_ch11_dimm_label.attr.attr,
++ &dev_attr_legacy_ch12_dimm_label.attr.attr,
++ &dev_attr_legacy_ch13_dimm_label.attr.attr,
++ &dev_attr_legacy_ch14_dimm_label.attr.attr,
++ &dev_attr_legacy_ch15_dimm_label.attr.attr,
+ NULL
+ };
+
+@@ -348,6 +360,14 @@ DEVICE_CHANNEL(ch10_ce_count, S_IRUGO,
+ channel_ce_count_show, NULL, 10);
+ DEVICE_CHANNEL(ch11_ce_count, S_IRUGO,
+ channel_ce_count_show, NULL, 11);
++DEVICE_CHANNEL(ch12_ce_count, S_IRUGO,
++ channel_ce_count_show, NULL, 12);
++DEVICE_CHANNEL(ch13_ce_count, S_IRUGO,
++ channel_ce_count_show, NULL, 13);
++DEVICE_CHANNEL(ch14_ce_count, S_IRUGO,
++ channel_ce_count_show, NULL, 14);
++DEVICE_CHANNEL(ch15_ce_count, S_IRUGO,
++ channel_ce_count_show, NULL, 15);
+
+ /* Total possible dynamic ce_count attribute file table */
+ static struct attribute *dynamic_csrow_ce_count_attr[] = {
+@@ -363,6 +383,10 @@ static struct attribute *dynamic_csrow_ce_count_attr[] = {
+ &dev_attr_legacy_ch9_ce_count.attr.attr,
+ &dev_attr_legacy_ch10_ce_count.attr.attr,
+ &dev_attr_legacy_ch11_ce_count.attr.attr,
++ &dev_attr_legacy_ch12_ce_count.attr.attr,
++ &dev_attr_legacy_ch13_ce_count.attr.attr,
++ &dev_attr_legacy_ch14_ce_count.attr.attr,
++ &dev_attr_legacy_ch15_ce_count.attr.attr,
+ NULL
+ };
+
+--
+2.51.0
+
--- /dev/null
+From 7dc7cb31fbaf707d2d6237c28bc7a3e55e13048d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 Oct 2025 16:48:59 +0100
+Subject: genirq/chip: Add buslock back in to irq_set_handler()
+
+From: Charles Keepax <ckeepax@opensource.cirrus.com>
+
+[ Upstream commit 5d7e45dd670e42df4836afeaa9baf9d41ca4b434 ]
+
+The locking was changed from a buslock to a plain lock, but the patch
+description states there was no functional change. Assuming this was
+accidental so reverting to using the buslock.
+
+Fixes: 5cd05f3e2315 ("genirq/chip: Rework irq_set_handler() variants")
+Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Link: https://patch.msgid.link/20251023154901.1333755-2-ckeepax@opensource.cirrus.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/irq/chip.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
+index 3ffa0d80ddd19..d1917b28761a3 100644
+--- a/kernel/irq/chip.c
++++ b/kernel/irq/chip.c
+@@ -1030,7 +1030,7 @@ __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle,
+ void __irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
+ const char *name)
+ {
+- scoped_irqdesc_get_and_lock(irq, 0)
++ scoped_irqdesc_get_and_buslock(irq, 0)
+ __irq_do_set_handler(scoped_irqdesc, handle, is_chained, name);
+ }
+ EXPORT_SYMBOL_GPL(__irq_set_handler);
+--
+2.51.0
+
--- /dev/null
+From 34c98b6e10f180a7abd2fbcca68ad9546c6625e5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 Oct 2025 16:49:00 +0100
+Subject: genirq/manage: Add buslock back in to __disable_irq_nosync()
+
+From: Charles Keepax <ckeepax@opensource.cirrus.com>
+
+[ Upstream commit 56363e25f79fe83e63039c5595b8cd9814173d37 ]
+
+The locking was changed from a buslock to a plain lock, but the patch
+description states there was no functional change. Assuming this was
+accidental so reverting to using the buslock.
+
+Fixes: 1b7444446724 ("genirq/manage: Rework __disable_irq_nosync()")
+Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Link: https://patch.msgid.link/20251023154901.1333755-3-ckeepax@opensource.cirrus.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/irq/manage.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
+index c94837382037e..7d68fb5dc2428 100644
+--- a/kernel/irq/manage.c
++++ b/kernel/irq/manage.c
+@@ -659,7 +659,7 @@ void __disable_irq(struct irq_desc *desc)
+
+ static int __disable_irq_nosync(unsigned int irq)
+ {
+- scoped_irqdesc_get_and_lock(irq, IRQ_GET_DESC_CHECK_GLOBAL) {
++ scoped_irqdesc_get_and_buslock(irq, IRQ_GET_DESC_CHECK_GLOBAL) {
+ __disable_irq(scoped_irqdesc);
+ return 0;
+ }
+--
+2.51.0
+
--- /dev/null
+From eefaa63d07aca4d44e91486f0a43039238559741 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 Oct 2025 16:49:01 +0100
+Subject: genirq/manage: Add buslock back in to enable_irq()
+
+From: Charles Keepax <ckeepax@opensource.cirrus.com>
+
+[ Upstream commit ef3330b99c01bda53f2a189b58bed8f6b7397f28 ]
+
+The locking was changed from a buslock to a plain lock, but the patch
+description states there was no functional change. Assuming this was
+accidental so reverting to using the buslock.
+
+Fixes: bddd10c55407 ("genirq/manage: Rework enable_irq()")
+Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Link: https://patch.msgid.link/20251023154901.1333755-4-ckeepax@opensource.cirrus.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/irq/manage.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
+index 7d68fb5dc2428..400856abf6721 100644
+--- a/kernel/irq/manage.c
++++ b/kernel/irq/manage.c
+@@ -789,7 +789,7 @@ void __enable_irq(struct irq_desc *desc)
+ */
+ void enable_irq(unsigned int irq)
+ {
+- scoped_irqdesc_get_and_lock(irq, IRQ_GET_DESC_CHECK_GLOBAL) {
++ scoped_irqdesc_get_and_buslock(irq, IRQ_GET_DESC_CHECK_GLOBAL) {
+ struct irq_desc *desc = scoped_irqdesc;
+
+ if (WARN(!desc->irq_data.chip, "enable_irq before setup/request_irq: irq %u\n", irq))
+--
+2.51.0
+
--- /dev/null
+From 82f22876b00c320ed9c7d964eeffcd4e786655ad Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Aug 2025 14:03:40 -0400
+Subject: perf: Have get_perf_callchain() return NULL if crosstask and user are
+ set
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+[ Upstream commit 153f9e74dec230f2e070e16fa061bc7adfd2c450 ]
+
+get_perf_callchain() doesn't support cross-task unwinding for user space
+stacks, have it return NULL if both the crosstask and user arguments are
+set.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20250820180428.426423415@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/events/callchain.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
+index decff7266cfbd..2609998ca07f1 100644
+--- a/kernel/events/callchain.c
++++ b/kernel/events/callchain.c
+@@ -224,6 +224,10 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
+ struct perf_callchain_entry_ctx ctx;
+ int rctx, start_entry_idx;
+
++ /* crosstask is not supported for user stacks */
++ if (crosstask && user && !kernel)
++ return NULL;
++
+ entry = get_callchain_entry(&rctx);
+ if (!entry)
+ return NULL;
+@@ -240,7 +244,7 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
+ perf_callchain_kernel(&ctx, regs);
+ }
+
+- if (user) {
++ if (user && !crosstask) {
+ if (!user_mode(regs)) {
+ if (current->flags & (PF_KTHREAD | PF_USER_WORKER))
+ regs = NULL;
+@@ -249,9 +253,6 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
+ }
+
+ if (regs) {
+- if (crosstask)
+- goto exit_put;
+-
+ if (add_mark)
+ perf_callchain_store_context(&ctx, PERF_CONTEXT_USER);
+
+@@ -261,7 +262,6 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
+ }
+ }
+
+-exit_put:
+ put_callchain_entry(rctx);
+
+ return entry;
+--
+2.51.0
+
--- /dev/null
+From 1642fd4e2d5f5e1dc02825acb53fa5f054b913fb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Aug 2025 14:03:43 -0400
+Subject: perf: Skip user unwind if the task is a kernel thread
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+[ Upstream commit 16ed389227651330879e17bd83d43bd234006722 ]
+
+If the task is not a user thread, there's no user stack to unwind.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20250820180428.930791978@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/events/core.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/kernel/events/core.c b/kernel/events/core.c
+index a3dc79ec6f879..c0e938d28758f 100644
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -8192,7 +8192,8 @@ struct perf_callchain_entry *
+ perf_callchain(struct perf_event *event, struct pt_regs *regs)
+ {
+ bool kernel = !event->attr.exclude_callchain_kernel;
+- bool user = !event->attr.exclude_callchain_user;
++ bool user = !event->attr.exclude_callchain_user &&
++ !(current->flags & (PF_KTHREAD | PF_USER_WORKER));
+ /* Disallow cross-task user callchains. */
+ bool crosstask = event->ctx->task && event->ctx->task != current;
+ const u32 max_stack = event->attr.sample_max_stack;
+--
+2.51.0
+
--- /dev/null
+From 8a38e567c4058e466e5e1b0823fe7b9c902ff337 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Aug 2025 14:03:41 -0400
+Subject: perf: Use current->flags & PF_KTHREAD|PF_USER_WORKER instead of
+ current->mm == NULL
+
+From: Steven Rostedt <rostedt@goodmis.org>
+
+[ Upstream commit 90942f9fac05702065ff82ed0bade0d08168d4ea ]
+
+To determine if a task is a kernel thread or not, it is more reliable to
+use (current->flags & (PF_KTHREAD|PF_USER_WORKERi)) than to rely on
+current->mm being NULL. That is because some kernel tasks (io_uring
+helpers) may have a mm field.
+
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20250820180428.592367294@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/events/callchain.c | 6 +++---
+ kernel/events/core.c | 4 ++--
+ 2 files changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
+index 6c83ad674d010..decff7266cfbd 100644
+--- a/kernel/events/callchain.c
++++ b/kernel/events/callchain.c
+@@ -242,10 +242,10 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
+
+ if (user) {
+ if (!user_mode(regs)) {
+- if (current->mm)
+- regs = task_pt_regs(current);
+- else
++ if (current->flags & (PF_KTHREAD | PF_USER_WORKER))
+ regs = NULL;
++ else
++ regs = task_pt_regs(current);
+ }
+
+ if (regs) {
+diff --git a/kernel/events/core.c b/kernel/events/core.c
+index 6e9427c4aaff7..a3dc79ec6f879 100644
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -7440,7 +7440,7 @@ static void perf_sample_regs_user(struct perf_regs *regs_user,
+ if (user_mode(regs)) {
+ regs_user->abi = perf_reg_abi(current);
+ regs_user->regs = regs;
+- } else if (!(current->flags & PF_KTHREAD)) {
++ } else if (!(current->flags & (PF_KTHREAD | PF_USER_WORKER))) {
+ perf_get_regs_user(regs_user, regs);
+ } else {
+ regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE;
+@@ -8080,7 +8080,7 @@ static u64 perf_virt_to_phys(u64 virt)
+ * Try IRQ-safe get_user_page_fast_only first.
+ * If failed, leave phys_addr as 0.
+ */
+- if (current->mm != NULL) {
++ if (!(current->flags & (PF_KTHREAD | PF_USER_WORKER))) {
+ struct page *p;
+
+ pagefault_disable();
+--
+2.51.0
+
--- /dev/null
+From ca9f460d3f2e517fd9f873da6a0d8f17baef1972 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Aug 2025 10:30:31 +0800
+Subject: perf/x86/intel: Add ICL_FIXED_0_ADAPTIVE bit into
+ INTEL_FIXED_BITS_MASK
+
+From: Dapeng Mi <dapeng1.mi@linux.intel.com>
+
+[ Upstream commit 2676dbf9f4fb7f6739d1207c0f1deaf63124642a ]
+
+ICL_FIXED_0_ADAPTIVE is missed to be added into INTEL_FIXED_BITS_MASK,
+add it.
+
+With help of this new INTEL_FIXED_BITS_MASK, intel_pmu_enable_fixed() can
+be optimized. The old fixed counter control bits can be unconditionally
+cleared with INTEL_FIXED_BITS_MASK and then set new control bits base on
+new configuration.
+
+Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
+Tested-by: Yi Lai <yi1.lai@intel.com>
+Link: https://lore.kernel.org/r/20250820023032.17128-7-dapeng1.mi@linux.intel.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/events/intel/core.c | 10 +++-------
+ arch/x86/include/asm/perf_event.h | 6 +++++-
+ arch/x86/kvm/pmu.h | 2 +-
+ 3 files changed, 9 insertions(+), 9 deletions(-)
+
+diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
+index 15da60cf69f20..046d12281fd94 100644
+--- a/arch/x86/events/intel/core.c
++++ b/arch/x86/events/intel/core.c
+@@ -2845,8 +2845,8 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
+ {
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct hw_perf_event *hwc = &event->hw;
+- u64 mask, bits = 0;
+ int idx = hwc->idx;
++ u64 bits = 0;
+
+ if (is_topdown_idx(idx)) {
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+@@ -2885,14 +2885,10 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
+
+ idx -= INTEL_PMC_IDX_FIXED;
+ bits = intel_fixed_bits_by_idx(idx, bits);
+- mask = intel_fixed_bits_by_idx(idx, INTEL_FIXED_BITS_MASK);
+-
+- if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip) {
++ if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip)
+ bits |= intel_fixed_bits_by_idx(idx, ICL_FIXED_0_ADAPTIVE);
+- mask |= intel_fixed_bits_by_idx(idx, ICL_FIXED_0_ADAPTIVE);
+- }
+
+- cpuc->fixed_ctrl_val &= ~mask;
++ cpuc->fixed_ctrl_val &= ~intel_fixed_bits_by_idx(idx, INTEL_FIXED_BITS_MASK);
+ cpuc->fixed_ctrl_val |= bits;
+ }
+
+diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
+index 70d1d94aca7e6..ee943bd1595af 100644
+--- a/arch/x86/include/asm/perf_event.h
++++ b/arch/x86/include/asm/perf_event.h
+@@ -35,7 +35,6 @@
+ #define ARCH_PERFMON_EVENTSEL_EQ (1ULL << 36)
+ #define ARCH_PERFMON_EVENTSEL_UMASK2 (0xFFULL << 40)
+
+-#define INTEL_FIXED_BITS_MASK 0xFULL
+ #define INTEL_FIXED_BITS_STRIDE 4
+ #define INTEL_FIXED_0_KERNEL (1ULL << 0)
+ #define INTEL_FIXED_0_USER (1ULL << 1)
+@@ -48,6 +47,11 @@
+ #define ICL_EVENTSEL_ADAPTIVE (1ULL << 34)
+ #define ICL_FIXED_0_ADAPTIVE (1ULL << 32)
+
++#define INTEL_FIXED_BITS_MASK \
++ (INTEL_FIXED_0_KERNEL | INTEL_FIXED_0_USER | \
++ INTEL_FIXED_0_ANYTHREAD | INTEL_FIXED_0_ENABLE_PMI | \
++ ICL_FIXED_0_ADAPTIVE)
++
+ #define intel_fixed_bits_by_idx(_idx, _bits) \
+ ((_bits) << ((_idx) * INTEL_FIXED_BITS_STRIDE))
+
+diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
+index ad89d0bd60058..103604c4b33b5 100644
+--- a/arch/x86/kvm/pmu.h
++++ b/arch/x86/kvm/pmu.h
+@@ -13,7 +13,7 @@
+ #define MSR_IA32_MISC_ENABLE_PMU_RO_MASK (MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL | \
+ MSR_IA32_MISC_ENABLE_BTS_UNAVAIL)
+
+-/* retrieve the 4 bits for EN and PMI out of IA32_FIXED_CTR_CTRL */
++/* retrieve a fixed counter bits out of IA32_FIXED_CTR_CTRL */
+ #define fixed_ctrl_field(ctrl_reg, idx) \
+ (((ctrl_reg) >> ((idx) * INTEL_FIXED_BITS_STRIDE)) & INTEL_FIXED_BITS_MASK)
+
+--
+2.51.0
+
--- /dev/null
+From cf5b7d7a98bfe5768f519b0840be34ee7ef9d389 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 10 Sep 2025 17:50:42 +0800
+Subject: sched/fair: update_cfs_group() for throttled cfs_rqs
+
+From: Aaron Lu <ziqianlu@bytedance.com>
+
+[ Upstream commit fcd394866e3db344cbe0bb485d7e3f741ac07245 ]
+
+With task based throttle model, tasks in a throttled hierarchy are
+allowed to continue to run if they are running in kernel mode. For this
+reason, PELT clock is not stopped for these cfs_rqs in throttled
+hierarchy when they still have tasks running or queued.
+
+Since PELT clock is not stopped, whether to allow update_cfs_group()
+doing its job for cfs_rqs which are in throttled hierarchy but still
+have tasks running/queued is a question.
+
+The good side is, continue to run update_cfs_group() can get these
+cfs_rq entities with an up2date weight and that up2date weight can be
+useful to derive an accurate load for the CPU as well as ensure fairness
+if multiple tasks of different cgroups are running on the same CPU.
+OTOH, as Benjamin Segall pointed: when unthrottle comes around the most
+likely correct distribution is the distribution we had at the time of
+throttle.
+
+In reality, either way may not matter that much if tasks in throttled
+hierarchy don't run in kernel mode for too long. But in case that
+happens, let these cfs_rq entities have an up2date weight seems a good
+thing to do.
+
+Signed-off-by: Aaron Lu <ziqianlu@bytedance.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/fair.c | 3 ---
+ 1 file changed, 3 deletions(-)
+
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index 4770d25ae2406..3e0d999e5ee2c 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -3957,9 +3957,6 @@ static void update_cfs_group(struct sched_entity *se)
+ if (!gcfs_rq || !gcfs_rq->load.weight)
+ return;
+
+- if (throttled_hierarchy(gcfs_rq))
+- return;
+-
+ shares = calc_group_shares(gcfs_rq);
+ if (unlikely(se->load.weight != shares))
+ reweight_entity(cfs_rq_of(se), se, shares);
+--
+2.51.0
+
--- /dev/null
+From 89d634457fa0b1abe8647e67fdc54d9c13669cb9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 3 Sep 2025 11:33:28 -1000
+Subject: sched_ext: Keep bypass on between enable failure and
+ scx_disable_workfn()
+
+From: Tejun Heo <tj@kernel.org>
+
+[ Upstream commit 4a1d9d73aabc8f97f48c4f84f936de3b265ffd6f ]
+
+scx_enable() turns on the bypass mode while enable is in progress. If
+enabling fails, it turns off the bypass mode and then triggers scx_error().
+scx_error() will trigger scx_disable_workfn() which will turn on the bypass
+mode again and unload the failed scheduler.
+
+This moves the system out of bypass mode between the enable error path and
+the disable path, which is unnecessary and can be brittle - e.g. the thread
+running scx_enable() may already be on the failed scheduler and can be
+switched out before it triggers scx_error() leading to a stall. The watchdog
+would eventually kick in, so the situation isn't critical but is still
+suboptimal.
+
+There is nothing to be gained by turning off the bypass mode between
+scx_enable() failure and scx_disable_workfn(). Keep bypass on.
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Acked-by: Andrea Righi <arighi@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/ext.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
+index f89894476e51f..14724dae0b795 100644
+--- a/kernel/sched/ext.c
++++ b/kernel/sched/ext.c
+@@ -4763,7 +4763,7 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)
+ err_disable_unlock_all:
+ scx_cgroup_unlock();
+ percpu_up_write(&scx_fork_rwsem);
+- scx_bypass(false);
++ /* we'll soon enter disable path, keep bypass on */
+ err_disable:
+ mutex_unlock(&scx_enable_mutex);
+ /*
+--
+2.51.0
+
--- /dev/null
+From d32b2132f0dc2b791769a353ae1bfafc2a0df0e2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Sep 2025 09:03:26 -1000
+Subject: sched_ext: Make qmap dump operation non-destructive
+
+From: Tejun Heo <tj@kernel.org>
+
+[ Upstream commit d452972858e5cfa4262320ab74fe8f016460b96f ]
+
+The qmap dump operation was destructively consuming queue entries while
+displaying them. As dump can be triggered anytime, this can easily lead to
+stalls. Add a temporary dump_store queue and modify the dump logic to pop
+entries, display them, and then restore them back to the original queue.
+This allows dump operations to be performed without affecting the
+scheduler's queue state.
+
+Note that if racing against new enqueues during dump, ordering can get
+mixed up, but this is acceptable for debugging purposes.
+
+Acked-by: Andrea Righi <arighi@nvidia.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/sched_ext/scx_qmap.bpf.c | 18 +++++++++++++++++-
+ 1 file changed, 17 insertions(+), 1 deletion(-)
+
+diff --git a/tools/sched_ext/scx_qmap.bpf.c b/tools/sched_ext/scx_qmap.bpf.c
+index 69d877501cb72..cd50a94326e3a 100644
+--- a/tools/sched_ext/scx_qmap.bpf.c
++++ b/tools/sched_ext/scx_qmap.bpf.c
+@@ -56,7 +56,8 @@ struct qmap {
+ queue1 SEC(".maps"),
+ queue2 SEC(".maps"),
+ queue3 SEC(".maps"),
+- queue4 SEC(".maps");
++ queue4 SEC(".maps"),
++ dump_store SEC(".maps");
+
+ struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+@@ -578,11 +579,26 @@ void BPF_STRUCT_OPS(qmap_dump, struct scx_dump_ctx *dctx)
+ return;
+
+ scx_bpf_dump("QMAP FIFO[%d]:", i);
++
++ /*
++ * Dump can be invoked anytime and there is no way to iterate in
++ * a non-destructive way. Pop and store in dump_store and then
++ * restore afterwards. If racing against new enqueues, ordering
++ * can get mixed up.
++ */
+ bpf_repeat(4096) {
+ if (bpf_map_pop_elem(fifo, &pid))
+ break;
++ bpf_map_push_elem(&dump_store, &pid, 0);
+ scx_bpf_dump(" %d", pid);
+ }
++
++ bpf_repeat(4096) {
++ if (bpf_map_pop_elem(&dump_store, &pid))
++ break;
++ bpf_map_push_elem(fifo, &pid, 0);
++ }
++
+ scx_bpf_dump("\n");
+ }
+ }
+--
+2.51.0
+
--- /dev/null
+From 63f4bd85d580e08409e9128b0715e253a2e0697f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 3 Sep 2025 11:33:28 -1000
+Subject: sched_ext: Move internal type and accessor definitions to
+ ext_internal.h
+
+From: Tejun Heo <tj@kernel.org>
+
+[ Upstream commit 0c2b8356e430229efef42b03bd765a2a7ecf73fd ]
+
+There currently isn't a place to place SCX-internal types and accessors to
+be shared between ext.c and ext_idle.c. Create kernel/sched/ext_internal.h
+and move internal type and accessor definitions there. This trims ext.c a
+bit and makes future additions easier. Pure code reorganization. No
+functional changes.
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Acked-by: Andrea Righi <arighi@nvidia.com>
+Stable-dep-of: efeeaac9ae97 ("sched_ext: Sync error_irq_work before freeing scx_sched")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/build_policy.c | 1 +
+ kernel/sched/ext.c | 1034 ----------------------------------
+ kernel/sched/ext.h | 23 -
+ kernel/sched/ext_internal.h | 1061 +++++++++++++++++++++++++++++++++++
+ 4 files changed, 1062 insertions(+), 1057 deletions(-)
+ create mode 100644 kernel/sched/ext_internal.h
+
+diff --git a/kernel/sched/build_policy.c b/kernel/sched/build_policy.c
+index c4a488e67aa7d..755883faf7518 100644
+--- a/kernel/sched/build_policy.c
++++ b/kernel/sched/build_policy.c
+@@ -58,6 +58,7 @@
+ #include "deadline.c"
+
+ #ifdef CONFIG_SCHED_CLASS_EXT
++# include "ext_internal.h"
+ # include "ext.c"
+ # include "ext_idle.c"
+ #endif
+diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
+index 088ceff38c8a4..8ecde1abb4e28 100644
+--- a/kernel/sched/ext.c
++++ b/kernel/sched/ext.c
+@@ -9,1040 +9,6 @@
+ #include <linux/btf_ids.h>
+ #include "ext_idle.h"
+
+-#define SCX_OP_IDX(op) (offsetof(struct sched_ext_ops, op) / sizeof(void (*)(void)))
+-
+-enum scx_consts {
+- SCX_DSP_DFL_MAX_BATCH = 32,
+- SCX_DSP_MAX_LOOPS = 32,
+- SCX_WATCHDOG_MAX_TIMEOUT = 30 * HZ,
+-
+- SCX_EXIT_BT_LEN = 64,
+- SCX_EXIT_MSG_LEN = 1024,
+- SCX_EXIT_DUMP_DFL_LEN = 32768,
+-
+- SCX_CPUPERF_ONE = SCHED_CAPACITY_SCALE,
+-
+- /*
+- * Iterating all tasks may take a while. Periodically drop
+- * scx_tasks_lock to avoid causing e.g. CSD and RCU stalls.
+- */
+- SCX_TASK_ITER_BATCH = 32,
+-};
+-
+-enum scx_exit_kind {
+- SCX_EXIT_NONE,
+- SCX_EXIT_DONE,
+-
+- SCX_EXIT_UNREG = 64, /* user-space initiated unregistration */
+- SCX_EXIT_UNREG_BPF, /* BPF-initiated unregistration */
+- SCX_EXIT_UNREG_KERN, /* kernel-initiated unregistration */
+- SCX_EXIT_SYSRQ, /* requested by 'S' sysrq */
+-
+- SCX_EXIT_ERROR = 1024, /* runtime error, error msg contains details */
+- SCX_EXIT_ERROR_BPF, /* ERROR but triggered through scx_bpf_error() */
+- SCX_EXIT_ERROR_STALL, /* watchdog detected stalled runnable tasks */
+-};
+-
+-/*
+- * An exit code can be specified when exiting with scx_bpf_exit() or scx_exit(),
+- * corresponding to exit_kind UNREG_BPF and UNREG_KERN respectively. The codes
+- * are 64bit of the format:
+- *
+- * Bits: [63 .. 48 47 .. 32 31 .. 0]
+- * [ SYS ACT ] [ SYS RSN ] [ USR ]
+- *
+- * SYS ACT: System-defined exit actions
+- * SYS RSN: System-defined exit reasons
+- * USR : User-defined exit codes and reasons
+- *
+- * Using the above, users may communicate intention and context by ORing system
+- * actions and/or system reasons with a user-defined exit code.
+- */
+-enum scx_exit_code {
+- /* Reasons */
+- SCX_ECODE_RSN_HOTPLUG = 1LLU << 32,
+-
+- /* Actions */
+- SCX_ECODE_ACT_RESTART = 1LLU << 48,
+-};
+-
+-/*
+- * scx_exit_info is passed to ops.exit() to describe why the BPF scheduler is
+- * being disabled.
+- */
+-struct scx_exit_info {
+- /* %SCX_EXIT_* - broad category of the exit reason */
+- enum scx_exit_kind kind;
+-
+- /* exit code if gracefully exiting */
+- s64 exit_code;
+-
+- /* textual representation of the above */
+- const char *reason;
+-
+- /* backtrace if exiting due to an error */
+- unsigned long *bt;
+- u32 bt_len;
+-
+- /* informational message */
+- char *msg;
+-
+- /* debug dump */
+- char *dump;
+-};
+-
+-/* sched_ext_ops.flags */
+-enum scx_ops_flags {
+- /*
+- * Keep built-in idle tracking even if ops.update_idle() is implemented.
+- */
+- SCX_OPS_KEEP_BUILTIN_IDLE = 1LLU << 0,
+-
+- /*
+- * By default, if there are no other task to run on the CPU, ext core
+- * keeps running the current task even after its slice expires. If this
+- * flag is specified, such tasks are passed to ops.enqueue() with
+- * %SCX_ENQ_LAST. See the comment above %SCX_ENQ_LAST for more info.
+- */
+- SCX_OPS_ENQ_LAST = 1LLU << 1,
+-
+- /*
+- * An exiting task may schedule after PF_EXITING is set. In such cases,
+- * bpf_task_from_pid() may not be able to find the task and if the BPF
+- * scheduler depends on pid lookup for dispatching, the task will be
+- * lost leading to various issues including RCU grace period stalls.
+- *
+- * To mask this problem, by default, unhashed tasks are automatically
+- * dispatched to the local DSQ on enqueue. If the BPF scheduler doesn't
+- * depend on pid lookups and wants to handle these tasks directly, the
+- * following flag can be used.
+- */
+- SCX_OPS_ENQ_EXITING = 1LLU << 2,
+-
+- /*
+- * If set, only tasks with policy set to SCHED_EXT are attached to
+- * sched_ext. If clear, SCHED_NORMAL tasks are also included.
+- */
+- SCX_OPS_SWITCH_PARTIAL = 1LLU << 3,
+-
+- /*
+- * A migration disabled task can only execute on its current CPU. By
+- * default, such tasks are automatically put on the CPU's local DSQ with
+- * the default slice on enqueue. If this ops flag is set, they also go
+- * through ops.enqueue().
+- *
+- * A migration disabled task never invokes ops.select_cpu() as it can
+- * only select the current CPU. Also, p->cpus_ptr will only contain its
+- * current CPU while p->nr_cpus_allowed keeps tracking p->user_cpus_ptr
+- * and thus may disagree with cpumask_weight(p->cpus_ptr).
+- */
+- SCX_OPS_ENQ_MIGRATION_DISABLED = 1LLU << 4,
+-
+- /*
+- * Queued wakeup (ttwu_queue) is a wakeup optimization that invokes
+- * ops.enqueue() on the ops.select_cpu() selected or the wakee's
+- * previous CPU via IPI (inter-processor interrupt) to reduce cacheline
+- * transfers. When this optimization is enabled, ops.select_cpu() is
+- * skipped in some cases (when racing against the wakee switching out).
+- * As the BPF scheduler may depend on ops.select_cpu() being invoked
+- * during wakeups, queued wakeup is disabled by default.
+- *
+- * If this ops flag is set, queued wakeup optimization is enabled and
+- * the BPF scheduler must be able to handle ops.enqueue() invoked on the
+- * wakee's CPU without preceding ops.select_cpu() even for tasks which
+- * may be executed on multiple CPUs.
+- */
+- SCX_OPS_ALLOW_QUEUED_WAKEUP = 1LLU << 5,
+-
+- /*
+- * If set, enable per-node idle cpumasks. If clear, use a single global
+- * flat idle cpumask.
+- */
+- SCX_OPS_BUILTIN_IDLE_PER_NODE = 1LLU << 6,
+-
+- /*
+- * CPU cgroup support flags
+- */
+- SCX_OPS_HAS_CGROUP_WEIGHT = 1LLU << 16, /* DEPRECATED, will be removed on 6.18 */
+-
+- SCX_OPS_ALL_FLAGS = SCX_OPS_KEEP_BUILTIN_IDLE |
+- SCX_OPS_ENQ_LAST |
+- SCX_OPS_ENQ_EXITING |
+- SCX_OPS_ENQ_MIGRATION_DISABLED |
+- SCX_OPS_ALLOW_QUEUED_WAKEUP |
+- SCX_OPS_SWITCH_PARTIAL |
+- SCX_OPS_BUILTIN_IDLE_PER_NODE |
+- SCX_OPS_HAS_CGROUP_WEIGHT,
+-
+- /* high 8 bits are internal, don't include in SCX_OPS_ALL_FLAGS */
+- __SCX_OPS_INTERNAL_MASK = 0xffLLU << 56,
+-
+- SCX_OPS_HAS_CPU_PREEMPT = 1LLU << 56,
+-};
+-
+-/* argument container for ops.init_task() */
+-struct scx_init_task_args {
+- /*
+- * Set if ops.init_task() is being invoked on the fork path, as opposed
+- * to the scheduler transition path.
+- */
+- bool fork;
+-#ifdef CONFIG_EXT_GROUP_SCHED
+- /* the cgroup the task is joining */
+- struct cgroup *cgroup;
+-#endif
+-};
+-
+-/* argument container for ops.exit_task() */
+-struct scx_exit_task_args {
+- /* Whether the task exited before running on sched_ext. */
+- bool cancelled;
+-};
+-
+-/* argument container for ops->cgroup_init() */
+-struct scx_cgroup_init_args {
+- /* the weight of the cgroup [1..10000] */
+- u32 weight;
+-
+- /* bandwidth control parameters from cpu.max and cpu.max.burst */
+- u64 bw_period_us;
+- u64 bw_quota_us;
+- u64 bw_burst_us;
+-};
+-
+-enum scx_cpu_preempt_reason {
+- /* next task is being scheduled by &sched_class_rt */
+- SCX_CPU_PREEMPT_RT,
+- /* next task is being scheduled by &sched_class_dl */
+- SCX_CPU_PREEMPT_DL,
+- /* next task is being scheduled by &sched_class_stop */
+- SCX_CPU_PREEMPT_STOP,
+- /* unknown reason for SCX being preempted */
+- SCX_CPU_PREEMPT_UNKNOWN,
+-};
+-
+-/*
+- * Argument container for ops->cpu_acquire(). Currently empty, but may be
+- * expanded in the future.
+- */
+-struct scx_cpu_acquire_args {};
+-
+-/* argument container for ops->cpu_release() */
+-struct scx_cpu_release_args {
+- /* the reason the CPU was preempted */
+- enum scx_cpu_preempt_reason reason;
+-
+- /* the task that's going to be scheduled on the CPU */
+- struct task_struct *task;
+-};
+-
+-/*
+- * Informational context provided to dump operations.
+- */
+-struct scx_dump_ctx {
+- enum scx_exit_kind kind;
+- s64 exit_code;
+- const char *reason;
+- u64 at_ns;
+- u64 at_jiffies;
+-};
+-
+-/**
+- * struct sched_ext_ops - Operation table for BPF scheduler implementation
+- *
+- * A BPF scheduler can implement an arbitrary scheduling policy by
+- * implementing and loading operations in this table. Note that a userland
+- * scheduling policy can also be implemented using the BPF scheduler
+- * as a shim layer.
+- */
+-struct sched_ext_ops {
+- /**
+- * @select_cpu: Pick the target CPU for a task which is being woken up
+- * @p: task being woken up
+- * @prev_cpu: the cpu @p was on before sleeping
+- * @wake_flags: SCX_WAKE_*
+- *
+- * Decision made here isn't final. @p may be moved to any CPU while it
+- * is getting dispatched for execution later. However, as @p is not on
+- * the rq at this point, getting the eventual execution CPU right here
+- * saves a small bit of overhead down the line.
+- *
+- * If an idle CPU is returned, the CPU is kicked and will try to
+- * dispatch. While an explicit custom mechanism can be added,
+- * select_cpu() serves as the default way to wake up idle CPUs.
+- *
+- * @p may be inserted into a DSQ directly by calling
+- * scx_bpf_dsq_insert(). If so, the ops.enqueue() will be skipped.
+- * Directly inserting into %SCX_DSQ_LOCAL will put @p in the local DSQ
+- * of the CPU returned by this operation.
+- *
+- * Note that select_cpu() is never called for tasks that can only run
+- * on a single CPU or tasks with migration disabled, as they don't have
+- * the option to select a different CPU. See select_task_rq() for
+- * details.
+- */
+- s32 (*select_cpu)(struct task_struct *p, s32 prev_cpu, u64 wake_flags);
+-
+- /**
+- * @enqueue: Enqueue a task on the BPF scheduler
+- * @p: task being enqueued
+- * @enq_flags: %SCX_ENQ_*
+- *
+- * @p is ready to run. Insert directly into a DSQ by calling
+- * scx_bpf_dsq_insert() or enqueue on the BPF scheduler. If not directly
+- * inserted, the bpf scheduler owns @p and if it fails to dispatch @p,
+- * the task will stall.
+- *
+- * If @p was inserted into a DSQ from ops.select_cpu(), this callback is
+- * skipped.
+- */
+- void (*enqueue)(struct task_struct *p, u64 enq_flags);
+-
+- /**
+- * @dequeue: Remove a task from the BPF scheduler
+- * @p: task being dequeued
+- * @deq_flags: %SCX_DEQ_*
+- *
+- * Remove @p from the BPF scheduler. This is usually called to isolate
+- * the task while updating its scheduling properties (e.g. priority).
+- *
+- * The ext core keeps track of whether the BPF side owns a given task or
+- * not and can gracefully ignore spurious dispatches from BPF side,
+- * which makes it safe to not implement this method. However, depending
+- * on the scheduling logic, this can lead to confusing behaviors - e.g.
+- * scheduling position not being updated across a priority change.
+- */
+- void (*dequeue)(struct task_struct *p, u64 deq_flags);
+-
+- /**
+- * @dispatch: Dispatch tasks from the BPF scheduler and/or user DSQs
+- * @cpu: CPU to dispatch tasks for
+- * @prev: previous task being switched out
+- *
+- * Called when a CPU's local dsq is empty. The operation should dispatch
+- * one or more tasks from the BPF scheduler into the DSQs using
+- * scx_bpf_dsq_insert() and/or move from user DSQs into the local DSQ
+- * using scx_bpf_dsq_move_to_local().
+- *
+- * The maximum number of times scx_bpf_dsq_insert() can be called
+- * without an intervening scx_bpf_dsq_move_to_local() is specified by
+- * ops.dispatch_max_batch. See the comments on top of the two functions
+- * for more details.
+- *
+- * When not %NULL, @prev is an SCX task with its slice depleted. If
+- * @prev is still runnable as indicated by set %SCX_TASK_QUEUED in
+- * @prev->scx.flags, it is not enqueued yet and will be enqueued after
+- * ops.dispatch() returns. To keep executing @prev, return without
+- * dispatching or moving any tasks. Also see %SCX_OPS_ENQ_LAST.
+- */
+- void (*dispatch)(s32 cpu, struct task_struct *prev);
+-
+- /**
+- * @tick: Periodic tick
+- * @p: task running currently
+- *
+- * This operation is called every 1/HZ seconds on CPUs which are
+- * executing an SCX task. Setting @p->scx.slice to 0 will trigger an
+- * immediate dispatch cycle on the CPU.
+- */
+- void (*tick)(struct task_struct *p);
+-
+- /**
+- * @runnable: A task is becoming runnable on its associated CPU
+- * @p: task becoming runnable
+- * @enq_flags: %SCX_ENQ_*
+- *
+- * This and the following three functions can be used to track a task's
+- * execution state transitions. A task becomes ->runnable() on a CPU,
+- * and then goes through one or more ->running() and ->stopping() pairs
+- * as it runs on the CPU, and eventually becomes ->quiescent() when it's
+- * done running on the CPU.
+- *
+- * @p is becoming runnable on the CPU because it's
+- *
+- * - waking up (%SCX_ENQ_WAKEUP)
+- * - being moved from another CPU
+- * - being restored after temporarily taken off the queue for an
+- * attribute change.
+- *
+- * This and ->enqueue() are related but not coupled. This operation
+- * notifies @p's state transition and may not be followed by ->enqueue()
+- * e.g. when @p is being dispatched to a remote CPU, or when @p is
+- * being enqueued on a CPU experiencing a hotplug event. Likewise, a
+- * task may be ->enqueue()'d without being preceded by this operation
+- * e.g. after exhausting its slice.
+- */
+- void (*runnable)(struct task_struct *p, u64 enq_flags);
+-
+- /**
+- * @running: A task is starting to run on its associated CPU
+- * @p: task starting to run
+- *
+- * Note that this callback may be called from a CPU other than the
+- * one the task is going to run on. This can happen when a task
+- * property is changed (i.e., affinity), since scx_next_task_scx(),
+- * which triggers this callback, may run on a CPU different from
+- * the task's assigned CPU.
+- *
+- * Therefore, always use scx_bpf_task_cpu(@p) to determine the
+- * target CPU the task is going to use.
+- *
+- * See ->runnable() for explanation on the task state notifiers.
+- */
+- void (*running)(struct task_struct *p);
+-
+- /**
+- * @stopping: A task is stopping execution
+- * @p: task stopping to run
+- * @runnable: is task @p still runnable?
+- *
+- * Note that this callback may be called from a CPU other than the
+- * one the task was running on. This can happen when a task
+- * property is changed (i.e., affinity), since dequeue_task_scx(),
+- * which triggers this callback, may run on a CPU different from
+- * the task's assigned CPU.
+- *
+- * Therefore, always use scx_bpf_task_cpu(@p) to retrieve the CPU
+- * the task was running on.
+- *
+- * See ->runnable() for explanation on the task state notifiers. If
+- * !@runnable, ->quiescent() will be invoked after this operation
+- * returns.
+- */
+- void (*stopping)(struct task_struct *p, bool runnable);
+-
+- /**
+- * @quiescent: A task is becoming not runnable on its associated CPU
+- * @p: task becoming not runnable
+- * @deq_flags: %SCX_DEQ_*
+- *
+- * See ->runnable() for explanation on the task state notifiers.
+- *
+- * @p is becoming quiescent on the CPU because it's
+- *
+- * - sleeping (%SCX_DEQ_SLEEP)
+- * - being moved to another CPU
+- * - being temporarily taken off the queue for an attribute change
+- * (%SCX_DEQ_SAVE)
+- *
+- * This and ->dequeue() are related but not coupled. This operation
+- * notifies @p's state transition and may not be preceded by ->dequeue()
+- * e.g. when @p is being dispatched to a remote CPU.
+- */
+- void (*quiescent)(struct task_struct *p, u64 deq_flags);
+-
+- /**
+- * @yield: Yield CPU
+- * @from: yielding task
+- * @to: optional yield target task
+- *
+- * If @to is NULL, @from is yielding the CPU to other runnable tasks.
+- * The BPF scheduler should ensure that other available tasks are
+- * dispatched before the yielding task. Return value is ignored in this
+- * case.
+- *
+- * If @to is not-NULL, @from wants to yield the CPU to @to. If the bpf
+- * scheduler can implement the request, return %true; otherwise, %false.
+- */
+- bool (*yield)(struct task_struct *from, struct task_struct *to);
+-
+- /**
+- * @core_sched_before: Task ordering for core-sched
+- * @a: task A
+- * @b: task B
+- *
+- * Used by core-sched to determine the ordering between two tasks. See
+- * Documentation/admin-guide/hw-vuln/core-scheduling.rst for details on
+- * core-sched.
+- *
+- * Both @a and @b are runnable and may or may not currently be queued on
+- * the BPF scheduler. Should return %true if @a should run before @b.
+- * %false if there's no required ordering or @b should run before @a.
+- *
+- * If not specified, the default is ordering them according to when they
+- * became runnable.
+- */
+- bool (*core_sched_before)(struct task_struct *a, struct task_struct *b);
+-
+- /**
+- * @set_weight: Set task weight
+- * @p: task to set weight for
+- * @weight: new weight [1..10000]
+- *
+- * Update @p's weight to @weight.
+- */
+- void (*set_weight)(struct task_struct *p, u32 weight);
+-
+- /**
+- * @set_cpumask: Set CPU affinity
+- * @p: task to set CPU affinity for
+- * @cpumask: cpumask of cpus that @p can run on
+- *
+- * Update @p's CPU affinity to @cpumask.
+- */
+- void (*set_cpumask)(struct task_struct *p,
+- const struct cpumask *cpumask);
+-
+- /**
+- * @update_idle: Update the idle state of a CPU
+- * @cpu: CPU to update the idle state for
+- * @idle: whether entering or exiting the idle state
+- *
+- * This operation is called when @rq's CPU goes or leaves the idle
+- * state. By default, implementing this operation disables the built-in
+- * idle CPU tracking and the following helpers become unavailable:
+- *
+- * - scx_bpf_select_cpu_dfl()
+- * - scx_bpf_select_cpu_and()
+- * - scx_bpf_test_and_clear_cpu_idle()
+- * - scx_bpf_pick_idle_cpu()
+- *
+- * The user also must implement ops.select_cpu() as the default
+- * implementation relies on scx_bpf_select_cpu_dfl().
+- *
+- * Specify the %SCX_OPS_KEEP_BUILTIN_IDLE flag to keep the built-in idle
+- * tracking.
+- */
+- void (*update_idle)(s32 cpu, bool idle);
+-
+- /**
+- * @cpu_acquire: A CPU is becoming available to the BPF scheduler
+- * @cpu: The CPU being acquired by the BPF scheduler.
+- * @args: Acquire arguments, see the struct definition.
+- *
+- * A CPU that was previously released from the BPF scheduler is now once
+- * again under its control.
+- */
+- void (*cpu_acquire)(s32 cpu, struct scx_cpu_acquire_args *args);
+-
+- /**
+- * @cpu_release: A CPU is taken away from the BPF scheduler
+- * @cpu: The CPU being released by the BPF scheduler.
+- * @args: Release arguments, see the struct definition.
+- *
+- * The specified CPU is no longer under the control of the BPF
+- * scheduler. This could be because it was preempted by a higher
+- * priority sched_class, though there may be other reasons as well. The
+- * caller should consult @args->reason to determine the cause.
+- */
+- void (*cpu_release)(s32 cpu, struct scx_cpu_release_args *args);
+-
+- /**
+- * @init_task: Initialize a task to run in a BPF scheduler
+- * @p: task to initialize for BPF scheduling
+- * @args: init arguments, see the struct definition
+- *
+- * Either we're loading a BPF scheduler or a new task is being forked.
+- * Initialize @p for BPF scheduling. This operation may block and can
+- * be used for allocations, and is called exactly once for a task.
+- *
+- * Return 0 for success, -errno for failure. An error return while
+- * loading will abort loading of the BPF scheduler. During a fork, it
+- * will abort that specific fork.
+- */
+- s32 (*init_task)(struct task_struct *p, struct scx_init_task_args *args);
+-
+- /**
+- * @exit_task: Exit a previously-running task from the system
+- * @p: task to exit
+- * @args: exit arguments, see the struct definition
+- *
+- * @p is exiting or the BPF scheduler is being unloaded. Perform any
+- * necessary cleanup for @p.
+- */
+- void (*exit_task)(struct task_struct *p, struct scx_exit_task_args *args);
+-
+- /**
+- * @enable: Enable BPF scheduling for a task
+- * @p: task to enable BPF scheduling for
+- *
+- * Enable @p for BPF scheduling. enable() is called on @p any time it
+- * enters SCX, and is always paired with a matching disable().
+- */
+- void (*enable)(struct task_struct *p);
+-
+- /**
+- * @disable: Disable BPF scheduling for a task
+- * @p: task to disable BPF scheduling for
+- *
+- * @p is exiting, leaving SCX or the BPF scheduler is being unloaded.
+- * Disable BPF scheduling for @p. A disable() call is always matched
+- * with a prior enable() call.
+- */
+- void (*disable)(struct task_struct *p);
+-
+- /**
+- * @dump: Dump BPF scheduler state on error
+- * @ctx: debug dump context
+- *
+- * Use scx_bpf_dump() to generate BPF scheduler specific debug dump.
+- */
+- void (*dump)(struct scx_dump_ctx *ctx);
+-
+- /**
+- * @dump_cpu: Dump BPF scheduler state for a CPU on error
+- * @ctx: debug dump context
+- * @cpu: CPU to generate debug dump for
+- * @idle: @cpu is currently idle without any runnable tasks
+- *
+- * Use scx_bpf_dump() to generate BPF scheduler specific debug dump for
+- * @cpu. If @idle is %true and this operation doesn't produce any
+- * output, @cpu is skipped for dump.
+- */
+- void (*dump_cpu)(struct scx_dump_ctx *ctx, s32 cpu, bool idle);
+-
+- /**
+- * @dump_task: Dump BPF scheduler state for a runnable task on error
+- * @ctx: debug dump context
+- * @p: runnable task to generate debug dump for
+- *
+- * Use scx_bpf_dump() to generate BPF scheduler specific debug dump for
+- * @p.
+- */
+- void (*dump_task)(struct scx_dump_ctx *ctx, struct task_struct *p);
+-
+-#ifdef CONFIG_EXT_GROUP_SCHED
+- /**
+- * @cgroup_init: Initialize a cgroup
+- * @cgrp: cgroup being initialized
+- * @args: init arguments, see the struct definition
+- *
+- * Either the BPF scheduler is being loaded or @cgrp created, initialize
+- * @cgrp for sched_ext. This operation may block.
+- *
+- * Return 0 for success, -errno for failure. An error return while
+- * loading will abort loading of the BPF scheduler. During cgroup
+- * creation, it will abort the specific cgroup creation.
+- */
+- s32 (*cgroup_init)(struct cgroup *cgrp,
+- struct scx_cgroup_init_args *args);
+-
+- /**
+- * @cgroup_exit: Exit a cgroup
+- * @cgrp: cgroup being exited
+- *
+- * Either the BPF scheduler is being unloaded or @cgrp destroyed, exit
+- * @cgrp for sched_ext. This operation my block.
+- */
+- void (*cgroup_exit)(struct cgroup *cgrp);
+-
+- /**
+- * @cgroup_prep_move: Prepare a task to be moved to a different cgroup
+- * @p: task being moved
+- * @from: cgroup @p is being moved from
+- * @to: cgroup @p is being moved to
+- *
+- * Prepare @p for move from cgroup @from to @to. This operation may
+- * block and can be used for allocations.
+- *
+- * Return 0 for success, -errno for failure. An error return aborts the
+- * migration.
+- */
+- s32 (*cgroup_prep_move)(struct task_struct *p,
+- struct cgroup *from, struct cgroup *to);
+-
+- /**
+- * @cgroup_move: Commit cgroup move
+- * @p: task being moved
+- * @from: cgroup @p is being moved from
+- * @to: cgroup @p is being moved to
+- *
+- * Commit the move. @p is dequeued during this operation.
+- */
+- void (*cgroup_move)(struct task_struct *p,
+- struct cgroup *from, struct cgroup *to);
+-
+- /**
+- * @cgroup_cancel_move: Cancel cgroup move
+- * @p: task whose cgroup move is being canceled
+- * @from: cgroup @p was being moved from
+- * @to: cgroup @p was being moved to
+- *
+- * @p was cgroup_prep_move()'d but failed before reaching cgroup_move().
+- * Undo the preparation.
+- */
+- void (*cgroup_cancel_move)(struct task_struct *p,
+- struct cgroup *from, struct cgroup *to);
+-
+- /**
+- * @cgroup_set_weight: A cgroup's weight is being changed
+- * @cgrp: cgroup whose weight is being updated
+- * @weight: new weight [1..10000]
+- *
+- * Update @cgrp's weight to @weight.
+- */
+- void (*cgroup_set_weight)(struct cgroup *cgrp, u32 weight);
+-
+- /**
+- * @cgroup_set_bandwidth: A cgroup's bandwidth is being changed
+- * @cgrp: cgroup whose bandwidth is being updated
+- * @period_us: bandwidth control period
+- * @quota_us: bandwidth control quota
+- * @burst_us: bandwidth control burst
+- *
+- * Update @cgrp's bandwidth control parameters. This is from the cpu.max
+- * cgroup interface.
+- *
+- * @quota_us / @period_us determines the CPU bandwidth @cgrp is entitled
+- * to. For example, if @period_us is 1_000_000 and @quota_us is
+- * 2_500_000. @cgrp is entitled to 2.5 CPUs. @burst_us can be
+- * interpreted in the same fashion and specifies how much @cgrp can
+- * burst temporarily. The specific control mechanism and thus the
+- * interpretation of @period_us and burstiness is upto to the BPF
+- * scheduler.
+- */
+- void (*cgroup_set_bandwidth)(struct cgroup *cgrp,
+- u64 period_us, u64 quota_us, u64 burst_us);
+-
+-#endif /* CONFIG_EXT_GROUP_SCHED */
+-
+- /*
+- * All online ops must come before ops.cpu_online().
+- */
+-
+- /**
+- * @cpu_online: A CPU became online
+- * @cpu: CPU which just came up
+- *
+- * @cpu just came online. @cpu will not call ops.enqueue() or
+- * ops.dispatch(), nor run tasks associated with other CPUs beforehand.
+- */
+- void (*cpu_online)(s32 cpu);
+-
+- /**
+- * @cpu_offline: A CPU is going offline
+- * @cpu: CPU which is going offline
+- *
+- * @cpu is going offline. @cpu will not call ops.enqueue() or
+- * ops.dispatch(), nor run tasks associated with other CPUs afterwards.
+- */
+- void (*cpu_offline)(s32 cpu);
+-
+- /*
+- * All CPU hotplug ops must come before ops.init().
+- */
+-
+- /**
+- * @init: Initialize the BPF scheduler
+- */
+- s32 (*init)(void);
+-
+- /**
+- * @exit: Clean up after the BPF scheduler
+- * @info: Exit info
+- *
+- * ops.exit() is also called on ops.init() failure, which is a bit
+- * unusual. This is to allow rich reporting through @info on how
+- * ops.init() failed.
+- */
+- void (*exit)(struct scx_exit_info *info);
+-
+- /**
+- * @dispatch_max_batch: Max nr of tasks that dispatch() can dispatch
+- */
+- u32 dispatch_max_batch;
+-
+- /**
+- * @flags: %SCX_OPS_* flags
+- */
+- u64 flags;
+-
+- /**
+- * @timeout_ms: The maximum amount of time, in milliseconds, that a
+- * runnable task should be able to wait before being scheduled. The
+- * maximum timeout may not exceed the default timeout of 30 seconds.
+- *
+- * Defaults to the maximum allowed timeout value of 30 seconds.
+- */
+- u32 timeout_ms;
+-
+- /**
+- * @exit_dump_len: scx_exit_info.dump buffer length. If 0, the default
+- * value of 32768 is used.
+- */
+- u32 exit_dump_len;
+-
+- /**
+- * @hotplug_seq: A sequence number that may be set by the scheduler to
+- * detect when a hotplug event has occurred during the loading process.
+- * If 0, no detection occurs. Otherwise, the scheduler will fail to
+- * load if the sequence number does not match @scx_hotplug_seq on the
+- * enable path.
+- */
+- u64 hotplug_seq;
+-
+- /**
+- * @name: BPF scheduler's name
+- *
+- * Must be a non-zero valid BPF object name including only isalnum(),
+- * '_' and '.' chars. Shows up in kernel.sched_ext_ops sysctl while the
+- * BPF scheduler is enabled.
+- */
+- char name[SCX_OPS_NAME_LEN];
+-
+- /* internal use only, must be NULL */
+- void *priv;
+-};
+-
+-enum scx_opi {
+- SCX_OPI_BEGIN = 0,
+- SCX_OPI_NORMAL_BEGIN = 0,
+- SCX_OPI_NORMAL_END = SCX_OP_IDX(cpu_online),
+- SCX_OPI_CPU_HOTPLUG_BEGIN = SCX_OP_IDX(cpu_online),
+- SCX_OPI_CPU_HOTPLUG_END = SCX_OP_IDX(init),
+- SCX_OPI_END = SCX_OP_IDX(init),
+-};
+-
+-/*
+- * Collection of event counters. Event types are placed in descending order.
+- */
+-struct scx_event_stats {
+- /*
+- * If ops.select_cpu() returns a CPU which can't be used by the task,
+- * the core scheduler code silently picks a fallback CPU.
+- */
+- s64 SCX_EV_SELECT_CPU_FALLBACK;
+-
+- /*
+- * When dispatching to a local DSQ, the CPU may have gone offline in
+- * the meantime. In this case, the task is bounced to the global DSQ.
+- */
+- s64 SCX_EV_DISPATCH_LOCAL_DSQ_OFFLINE;
+-
+- /*
+- * If SCX_OPS_ENQ_LAST is not set, the number of times that a task
+- * continued to run because there were no other tasks on the CPU.
+- */
+- s64 SCX_EV_DISPATCH_KEEP_LAST;
+-
+- /*
+- * If SCX_OPS_ENQ_EXITING is not set, the number of times that a task
+- * is dispatched to a local DSQ when exiting.
+- */
+- s64 SCX_EV_ENQ_SKIP_EXITING;
+-
+- /*
+- * If SCX_OPS_ENQ_MIGRATION_DISABLED is not set, the number of times a
+- * migration disabled task skips ops.enqueue() and is dispatched to its
+- * local DSQ.
+- */
+- s64 SCX_EV_ENQ_SKIP_MIGRATION_DISABLED;
+-
+- /*
+- * Total number of times a task's time slice was refilled with the
+- * default value (SCX_SLICE_DFL).
+- */
+- s64 SCX_EV_REFILL_SLICE_DFL;
+-
+- /*
+- * The total duration of bypass modes in nanoseconds.
+- */
+- s64 SCX_EV_BYPASS_DURATION;
+-
+- /*
+- * The number of tasks dispatched in the bypassing mode.
+- */
+- s64 SCX_EV_BYPASS_DISPATCH;
+-
+- /*
+- * The number of times the bypassing mode has been activated.
+- */
+- s64 SCX_EV_BYPASS_ACTIVATE;
+-};
+-
+-struct scx_sched {
+- struct sched_ext_ops ops;
+- DECLARE_BITMAP(has_op, SCX_OPI_END);
+-
+- /*
+- * Dispatch queues.
+- *
+- * The global DSQ (%SCX_DSQ_GLOBAL) is split per-node for scalability.
+- * This is to avoid live-locking in bypass mode where all tasks are
+- * dispatched to %SCX_DSQ_GLOBAL and all CPUs consume from it. If
+- * per-node split isn't sufficient, it can be further split.
+- */
+- struct rhashtable dsq_hash;
+- struct scx_dispatch_q **global_dsqs;
+-
+- /*
+- * The event counters are in a per-CPU variable to minimize the
+- * accounting overhead. A system-wide view on the event counter is
+- * constructed when requested by scx_bpf_events().
+- */
+- struct scx_event_stats __percpu *event_stats_cpu;
+-
+- bool warned_zero_slice;
+-
+- atomic_t exit_kind;
+- struct scx_exit_info *exit_info;
+-
+- struct kobject kobj;
+-
+- struct kthread_worker *helper;
+- struct irq_work error_irq_work;
+- struct kthread_work disable_work;
+- struct rcu_work rcu_work;
+-};
+-
+-enum scx_wake_flags {
+- /* expose select WF_* flags as enums */
+- SCX_WAKE_FORK = WF_FORK,
+- SCX_WAKE_TTWU = WF_TTWU,
+- SCX_WAKE_SYNC = WF_SYNC,
+-};
+-
+-enum scx_enq_flags {
+- /* expose select ENQUEUE_* flags as enums */
+- SCX_ENQ_WAKEUP = ENQUEUE_WAKEUP,
+- SCX_ENQ_HEAD = ENQUEUE_HEAD,
+- SCX_ENQ_CPU_SELECTED = ENQUEUE_RQ_SELECTED,
+-
+- /* high 32bits are SCX specific */
+-
+- /*
+- * Set the following to trigger preemption when calling
+- * scx_bpf_dsq_insert() with a local dsq as the target. The slice of the
+- * current task is cleared to zero and the CPU is kicked into the
+- * scheduling path. Implies %SCX_ENQ_HEAD.
+- */
+- SCX_ENQ_PREEMPT = 1LLU << 32,
+-
+- /*
+- * The task being enqueued was previously enqueued on the current CPU's
+- * %SCX_DSQ_LOCAL, but was removed from it in a call to the
+- * scx_bpf_reenqueue_local() kfunc. If scx_bpf_reenqueue_local() was
+- * invoked in a ->cpu_release() callback, and the task is again
+- * dispatched back to %SCX_LOCAL_DSQ by this current ->enqueue(), the
+- * task will not be scheduled on the CPU until at least the next invocation
+- * of the ->cpu_acquire() callback.
+- */
+- SCX_ENQ_REENQ = 1LLU << 40,
+-
+- /*
+- * The task being enqueued is the only task available for the cpu. By
+- * default, ext core keeps executing such tasks but when
+- * %SCX_OPS_ENQ_LAST is specified, they're ops.enqueue()'d with the
+- * %SCX_ENQ_LAST flag set.
+- *
+- * The BPF scheduler is responsible for triggering a follow-up
+- * scheduling event. Otherwise, Execution may stall.
+- */
+- SCX_ENQ_LAST = 1LLU << 41,
+-
+- /* high 8 bits are internal */
+- __SCX_ENQ_INTERNAL_MASK = 0xffLLU << 56,
+-
+- SCX_ENQ_CLEAR_OPSS = 1LLU << 56,
+- SCX_ENQ_DSQ_PRIQ = 1LLU << 57,
+-};
+-
+-enum scx_deq_flags {
+- /* expose select DEQUEUE_* flags as enums */
+- SCX_DEQ_SLEEP = DEQUEUE_SLEEP,
+-
+- /* high 32bits are SCX specific */
+-
+- /*
+- * The generic core-sched layer decided to execute the task even though
+- * it hasn't been dispatched yet. Dequeue from the BPF side.
+- */
+- SCX_DEQ_CORE_SCHED_EXEC = 1LLU << 32,
+-};
+-
+-enum scx_pick_idle_cpu_flags {
+- SCX_PICK_IDLE_CORE = 1LLU << 0, /* pick a CPU whose SMT siblings are also idle */
+- SCX_PICK_IDLE_IN_NODE = 1LLU << 1, /* pick a CPU in the same target NUMA node */
+-};
+-
+-enum scx_kick_flags {
+- /*
+- * Kick the target CPU if idle. Guarantees that the target CPU goes
+- * through at least one full scheduling cycle before going idle. If the
+- * target CPU can be determined to be currently not idle and going to go
+- * through a scheduling cycle before going idle, noop.
+- */
+- SCX_KICK_IDLE = 1LLU << 0,
+-
+- /*
+- * Preempt the current task and execute the dispatch path. If the
+- * current task of the target CPU is an SCX task, its ->scx.slice is
+- * cleared to zero before the scheduling path is invoked so that the
+- * task expires and the dispatch path is invoked.
+- */
+- SCX_KICK_PREEMPT = 1LLU << 1,
+-
+- /*
+- * Wait for the CPU to be rescheduled. The scx_bpf_kick_cpu() call will
+- * return after the target CPU finishes picking the next task.
+- */
+- SCX_KICK_WAIT = 1LLU << 2,
+-};
+-
+-enum scx_tg_flags {
+- SCX_TG_ONLINE = 1U << 0,
+- SCX_TG_INITED = 1U << 1,
+-};
+-
+-enum scx_enable_state {
+- SCX_ENABLING,
+- SCX_ENABLED,
+- SCX_DISABLING,
+- SCX_DISABLED,
+-};
+-
+-static const char *scx_enable_state_str[] = {
+- [SCX_ENABLING] = "enabling",
+- [SCX_ENABLED] = "enabled",
+- [SCX_DISABLING] = "disabling",
+- [SCX_DISABLED] = "disabled",
+-};
+-
+-/*
+- * sched_ext_entity->ops_state
+- *
+- * Used to track the task ownership between the SCX core and the BPF scheduler.
+- * State transitions look as follows:
+- *
+- * NONE -> QUEUEING -> QUEUED -> DISPATCHING
+- * ^ | |
+- * | v v
+- * \-------------------------------/
+- *
+- * QUEUEING and DISPATCHING states can be waited upon. See wait_ops_state() call
+- * sites for explanations on the conditions being waited upon and why they are
+- * safe. Transitions out of them into NONE or QUEUED must store_release and the
+- * waiters should load_acquire.
+- *
+- * Tracking scx_ops_state enables sched_ext core to reliably determine whether
+- * any given task can be dispatched by the BPF scheduler at all times and thus
+- * relaxes the requirements on the BPF scheduler. This allows the BPF scheduler
+- * to try to dispatch any task anytime regardless of its state as the SCX core
+- * can safely reject invalid dispatches.
+- */
+-enum scx_ops_state {
+- SCX_OPSS_NONE, /* owned by the SCX core */
+- SCX_OPSS_QUEUEING, /* in transit to the BPF scheduler */
+- SCX_OPSS_QUEUED, /* owned by the BPF scheduler */
+- SCX_OPSS_DISPATCHING, /* in transit back to the SCX core */
+-
+- /*
+- * QSEQ brands each QUEUED instance so that, when dispatch races
+- * dequeue/requeue, the dispatcher can tell whether it still has a claim
+- * on the task being dispatched.
+- *
+- * As some 32bit archs can't do 64bit store_release/load_acquire,
+- * p->scx.ops_state is atomic_long_t which leaves 30 bits for QSEQ on
+- * 32bit machines. The dispatch race window QSEQ protects is very narrow
+- * and runs with IRQ disabled. 30 bits should be sufficient.
+- */
+- SCX_OPSS_QSEQ_SHIFT = 2,
+-};
+-
+-/* Use macros to ensure that the type is unsigned long for the masks */
+-#define SCX_OPSS_STATE_MASK ((1LU << SCX_OPSS_QSEQ_SHIFT) - 1)
+-#define SCX_OPSS_QSEQ_MASK (~SCX_OPSS_STATE_MASK)
+-
+ /*
+ * NOTE: sched_ext is in the process of growing multiple scheduler support and
+ * scx_root usage is in a transitional state. Naked dereferences are safe if the
+diff --git a/kernel/sched/ext.h b/kernel/sched/ext.h
+index 292bb41a242ec..33858607bc97f 100644
+--- a/kernel/sched/ext.h
++++ b/kernel/sched/ext.h
+@@ -8,29 +8,6 @@
+ */
+ #ifdef CONFIG_SCHED_CLASS_EXT
+
+-static inline bool scx_kf_allowed_if_unlocked(void)
+-{
+- return !current->scx.kf_mask;
+-}
+-
+-static inline bool scx_rq_bypassing(struct rq *rq)
+-{
+- return unlikely(rq->scx.flags & SCX_RQ_BYPASSING);
+-}
+-
+-DECLARE_STATIC_KEY_FALSE(scx_ops_allow_queued_wakeup);
+-
+-DECLARE_PER_CPU(struct rq *, scx_locked_rq_state);
+-
+-/*
+- * Return the rq currently locked from an scx callback, or NULL if no rq is
+- * locked.
+- */
+-static inline struct rq *scx_locked_rq(void)
+-{
+- return __this_cpu_read(scx_locked_rq_state);
+-}
+-
+ void scx_tick(struct rq *rq);
+ void init_scx_entity(struct sched_ext_entity *scx);
+ void scx_pre_fork(struct task_struct *p);
+diff --git a/kernel/sched/ext_internal.h b/kernel/sched/ext_internal.h
+new file mode 100644
+index 0000000000000..76690ede8700f
+--- /dev/null
++++ b/kernel/sched/ext_internal.h
+@@ -0,0 +1,1061 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * BPF extensible scheduler class: Documentation/scheduler/sched-ext.rst
++ *
++ * Copyright (c) 2025 Meta Platforms, Inc. and affiliates.
++ * Copyright (c) 2025 Tejun Heo <tj@kernel.org>
++ */
++#define SCX_OP_IDX(op) (offsetof(struct sched_ext_ops, op) / sizeof(void (*)(void)))
++
++enum scx_consts {
++ SCX_DSP_DFL_MAX_BATCH = 32,
++ SCX_DSP_MAX_LOOPS = 32,
++ SCX_WATCHDOG_MAX_TIMEOUT = 30 * HZ,
++
++ SCX_EXIT_BT_LEN = 64,
++ SCX_EXIT_MSG_LEN = 1024,
++ SCX_EXIT_DUMP_DFL_LEN = 32768,
++
++ SCX_CPUPERF_ONE = SCHED_CAPACITY_SCALE,
++
++ /*
++ * Iterating all tasks may take a while. Periodically drop
++ * scx_tasks_lock to avoid causing e.g. CSD and RCU stalls.
++ */
++ SCX_TASK_ITER_BATCH = 32,
++};
++
++enum scx_exit_kind {
++ SCX_EXIT_NONE,
++ SCX_EXIT_DONE,
++
++ SCX_EXIT_UNREG = 64, /* user-space initiated unregistration */
++ SCX_EXIT_UNREG_BPF, /* BPF-initiated unregistration */
++ SCX_EXIT_UNREG_KERN, /* kernel-initiated unregistration */
++ SCX_EXIT_SYSRQ, /* requested by 'S' sysrq */
++
++ SCX_EXIT_ERROR = 1024, /* runtime error, error msg contains details */
++ SCX_EXIT_ERROR_BPF, /* ERROR but triggered through scx_bpf_error() */
++ SCX_EXIT_ERROR_STALL, /* watchdog detected stalled runnable tasks */
++};
++
++/*
++ * An exit code can be specified when exiting with scx_bpf_exit() or scx_exit(),
++ * corresponding to exit_kind UNREG_BPF and UNREG_KERN respectively. The codes
++ * are 64bit of the format:
++ *
++ * Bits: [63 .. 48 47 .. 32 31 .. 0]
++ * [ SYS ACT ] [ SYS RSN ] [ USR ]
++ *
++ * SYS ACT: System-defined exit actions
++ * SYS RSN: System-defined exit reasons
++ * USR : User-defined exit codes and reasons
++ *
++ * Using the above, users may communicate intention and context by ORing system
++ * actions and/or system reasons with a user-defined exit code.
++ */
++enum scx_exit_code {
++ /* Reasons */
++ SCX_ECODE_RSN_HOTPLUG = 1LLU << 32,
++
++ /* Actions */
++ SCX_ECODE_ACT_RESTART = 1LLU << 48,
++};
++
++/*
++ * scx_exit_info is passed to ops.exit() to describe why the BPF scheduler is
++ * being disabled.
++ */
++struct scx_exit_info {
++ /* %SCX_EXIT_* - broad category of the exit reason */
++ enum scx_exit_kind kind;
++
++ /* exit code if gracefully exiting */
++ s64 exit_code;
++
++ /* textual representation of the above */
++ const char *reason;
++
++ /* backtrace if exiting due to an error */
++ unsigned long *bt;
++ u32 bt_len;
++
++ /* informational message */
++ char *msg;
++
++ /* debug dump */
++ char *dump;
++};
++
++/* sched_ext_ops.flags */
++enum scx_ops_flags {
++ /*
++ * Keep built-in idle tracking even if ops.update_idle() is implemented.
++ */
++ SCX_OPS_KEEP_BUILTIN_IDLE = 1LLU << 0,
++
++ /*
++ * By default, if there are no other task to run on the CPU, ext core
++ * keeps running the current task even after its slice expires. If this
++ * flag is specified, such tasks are passed to ops.enqueue() with
++ * %SCX_ENQ_LAST. See the comment above %SCX_ENQ_LAST for more info.
++ */
++ SCX_OPS_ENQ_LAST = 1LLU << 1,
++
++ /*
++ * An exiting task may schedule after PF_EXITING is set. In such cases,
++ * bpf_task_from_pid() may not be able to find the task and if the BPF
++ * scheduler depends on pid lookup for dispatching, the task will be
++ * lost leading to various issues including RCU grace period stalls.
++ *
++ * To mask this problem, by default, unhashed tasks are automatically
++ * dispatched to the local DSQ on enqueue. If the BPF scheduler doesn't
++ * depend on pid lookups and wants to handle these tasks directly, the
++ * following flag can be used.
++ */
++ SCX_OPS_ENQ_EXITING = 1LLU << 2,
++
++ /*
++ * If set, only tasks with policy set to SCHED_EXT are attached to
++ * sched_ext. If clear, SCHED_NORMAL tasks are also included.
++ */
++ SCX_OPS_SWITCH_PARTIAL = 1LLU << 3,
++
++ /*
++ * A migration disabled task can only execute on its current CPU. By
++ * default, such tasks are automatically put on the CPU's local DSQ with
++ * the default slice on enqueue. If this ops flag is set, they also go
++ * through ops.enqueue().
++ *
++ * A migration disabled task never invokes ops.select_cpu() as it can
++ * only select the current CPU. Also, p->cpus_ptr will only contain its
++ * current CPU while p->nr_cpus_allowed keeps tracking p->user_cpus_ptr
++ * and thus may disagree with cpumask_weight(p->cpus_ptr).
++ */
++ SCX_OPS_ENQ_MIGRATION_DISABLED = 1LLU << 4,
++
++ /*
++ * Queued wakeup (ttwu_queue) is a wakeup optimization that invokes
++ * ops.enqueue() on the ops.select_cpu() selected or the wakee's
++ * previous CPU via IPI (inter-processor interrupt) to reduce cacheline
++ * transfers. When this optimization is enabled, ops.select_cpu() is
++ * skipped in some cases (when racing against the wakee switching out).
++ * As the BPF scheduler may depend on ops.select_cpu() being invoked
++ * during wakeups, queued wakeup is disabled by default.
++ *
++ * If this ops flag is set, queued wakeup optimization is enabled and
++ * the BPF scheduler must be able to handle ops.enqueue() invoked on the
++ * wakee's CPU without preceding ops.select_cpu() even for tasks which
++ * may be executed on multiple CPUs.
++ */
++ SCX_OPS_ALLOW_QUEUED_WAKEUP = 1LLU << 5,
++
++ /*
++ * If set, enable per-node idle cpumasks. If clear, use a single global
++ * flat idle cpumask.
++ */
++ SCX_OPS_BUILTIN_IDLE_PER_NODE = 1LLU << 6,
++
++ /*
++ * CPU cgroup support flags
++ */
++ SCX_OPS_HAS_CGROUP_WEIGHT = 1LLU << 16, /* DEPRECATED, will be removed on 6.18 */
++
++ SCX_OPS_ALL_FLAGS = SCX_OPS_KEEP_BUILTIN_IDLE |
++ SCX_OPS_ENQ_LAST |
++ SCX_OPS_ENQ_EXITING |
++ SCX_OPS_ENQ_MIGRATION_DISABLED |
++ SCX_OPS_ALLOW_QUEUED_WAKEUP |
++ SCX_OPS_SWITCH_PARTIAL |
++ SCX_OPS_BUILTIN_IDLE_PER_NODE |
++ SCX_OPS_HAS_CGROUP_WEIGHT,
++
++ /* high 8 bits are internal, don't include in SCX_OPS_ALL_FLAGS */
++ __SCX_OPS_INTERNAL_MASK = 0xffLLU << 56,
++
++ SCX_OPS_HAS_CPU_PREEMPT = 1LLU << 56,
++};
++
++/* argument container for ops.init_task() */
++struct scx_init_task_args {
++ /*
++ * Set if ops.init_task() is being invoked on the fork path, as opposed
++ * to the scheduler transition path.
++ */
++ bool fork;
++#ifdef CONFIG_EXT_GROUP_SCHED
++ /* the cgroup the task is joining */
++ struct cgroup *cgroup;
++#endif
++};
++
++/* argument container for ops.exit_task() */
++struct scx_exit_task_args {
++ /* Whether the task exited before running on sched_ext. */
++ bool cancelled;
++};
++
++/* argument container for ops->cgroup_init() */
++struct scx_cgroup_init_args {
++ /* the weight of the cgroup [1..10000] */
++ u32 weight;
++
++ /* bandwidth control parameters from cpu.max and cpu.max.burst */
++ u64 bw_period_us;
++ u64 bw_quota_us;
++ u64 bw_burst_us;
++};
++
++enum scx_cpu_preempt_reason {
++ /* next task is being scheduled by &sched_class_rt */
++ SCX_CPU_PREEMPT_RT,
++ /* next task is being scheduled by &sched_class_dl */
++ SCX_CPU_PREEMPT_DL,
++ /* next task is being scheduled by &sched_class_stop */
++ SCX_CPU_PREEMPT_STOP,
++ /* unknown reason for SCX being preempted */
++ SCX_CPU_PREEMPT_UNKNOWN,
++};
++
++/*
++ * Argument container for ops->cpu_acquire(). Currently empty, but may be
++ * expanded in the future.
++ */
++struct scx_cpu_acquire_args {};
++
++/* argument container for ops->cpu_release() */
++struct scx_cpu_release_args {
++ /* the reason the CPU was preempted */
++ enum scx_cpu_preempt_reason reason;
++
++ /* the task that's going to be scheduled on the CPU */
++ struct task_struct *task;
++};
++
++/*
++ * Informational context provided to dump operations.
++ */
++struct scx_dump_ctx {
++ enum scx_exit_kind kind;
++ s64 exit_code;
++ const char *reason;
++ u64 at_ns;
++ u64 at_jiffies;
++};
++
++/**
++ * struct sched_ext_ops - Operation table for BPF scheduler implementation
++ *
++ * A BPF scheduler can implement an arbitrary scheduling policy by
++ * implementing and loading operations in this table. Note that a userland
++ * scheduling policy can also be implemented using the BPF scheduler
++ * as a shim layer.
++ */
++struct sched_ext_ops {
++ /**
++ * @select_cpu: Pick the target CPU for a task which is being woken up
++ * @p: task being woken up
++ * @prev_cpu: the cpu @p was on before sleeping
++ * @wake_flags: SCX_WAKE_*
++ *
++ * Decision made here isn't final. @p may be moved to any CPU while it
++ * is getting dispatched for execution later. However, as @p is not on
++ * the rq at this point, getting the eventual execution CPU right here
++ * saves a small bit of overhead down the line.
++ *
++ * If an idle CPU is returned, the CPU is kicked and will try to
++ * dispatch. While an explicit custom mechanism can be added,
++ * select_cpu() serves as the default way to wake up idle CPUs.
++ *
++ * @p may be inserted into a DSQ directly by calling
++ * scx_bpf_dsq_insert(). If so, the ops.enqueue() will be skipped.
++ * Directly inserting into %SCX_DSQ_LOCAL will put @p in the local DSQ
++ * of the CPU returned by this operation.
++ *
++ * Note that select_cpu() is never called for tasks that can only run
++ * on a single CPU or tasks with migration disabled, as they don't have
++ * the option to select a different CPU. See select_task_rq() for
++ * details.
++ */
++ s32 (*select_cpu)(struct task_struct *p, s32 prev_cpu, u64 wake_flags);
++
++ /**
++ * @enqueue: Enqueue a task on the BPF scheduler
++ * @p: task being enqueued
++ * @enq_flags: %SCX_ENQ_*
++ *
++ * @p is ready to run. Insert directly into a DSQ by calling
++ * scx_bpf_dsq_insert() or enqueue on the BPF scheduler. If not directly
++ * inserted, the bpf scheduler owns @p and if it fails to dispatch @p,
++ * the task will stall.
++ *
++ * If @p was inserted into a DSQ from ops.select_cpu(), this callback is
++ * skipped.
++ */
++ void (*enqueue)(struct task_struct *p, u64 enq_flags);
++
++ /**
++ * @dequeue: Remove a task from the BPF scheduler
++ * @p: task being dequeued
++ * @deq_flags: %SCX_DEQ_*
++ *
++ * Remove @p from the BPF scheduler. This is usually called to isolate
++ * the task while updating its scheduling properties (e.g. priority).
++ *
++ * The ext core keeps track of whether the BPF side owns a given task or
++ * not and can gracefully ignore spurious dispatches from BPF side,
++ * which makes it safe to not implement this method. However, depending
++ * on the scheduling logic, this can lead to confusing behaviors - e.g.
++ * scheduling position not being updated across a priority change.
++ */
++ void (*dequeue)(struct task_struct *p, u64 deq_flags);
++
++ /**
++ * @dispatch: Dispatch tasks from the BPF scheduler and/or user DSQs
++ * @cpu: CPU to dispatch tasks for
++ * @prev: previous task being switched out
++ *
++ * Called when a CPU's local dsq is empty. The operation should dispatch
++ * one or more tasks from the BPF scheduler into the DSQs using
++ * scx_bpf_dsq_insert() and/or move from user DSQs into the local DSQ
++ * using scx_bpf_dsq_move_to_local().
++ *
++ * The maximum number of times scx_bpf_dsq_insert() can be called
++ * without an intervening scx_bpf_dsq_move_to_local() is specified by
++ * ops.dispatch_max_batch. See the comments on top of the two functions
++ * for more details.
++ *
++ * When not %NULL, @prev is an SCX task with its slice depleted. If
++ * @prev is still runnable as indicated by set %SCX_TASK_QUEUED in
++ * @prev->scx.flags, it is not enqueued yet and will be enqueued after
++ * ops.dispatch() returns. To keep executing @prev, return without
++ * dispatching or moving any tasks. Also see %SCX_OPS_ENQ_LAST.
++ */
++ void (*dispatch)(s32 cpu, struct task_struct *prev);
++
++ /**
++ * @tick: Periodic tick
++ * @p: task running currently
++ *
++ * This operation is called every 1/HZ seconds on CPUs which are
++ * executing an SCX task. Setting @p->scx.slice to 0 will trigger an
++ * immediate dispatch cycle on the CPU.
++ */
++ void (*tick)(struct task_struct *p);
++
++ /**
++ * @runnable: A task is becoming runnable on its associated CPU
++ * @p: task becoming runnable
++ * @enq_flags: %SCX_ENQ_*
++ *
++ * This and the following three functions can be used to track a task's
++ * execution state transitions. A task becomes ->runnable() on a CPU,
++ * and then goes through one or more ->running() and ->stopping() pairs
++ * as it runs on the CPU, and eventually becomes ->quiescent() when it's
++ * done running on the CPU.
++ *
++ * @p is becoming runnable on the CPU because it's
++ *
++ * - waking up (%SCX_ENQ_WAKEUP)
++ * - being moved from another CPU
++ * - being restored after temporarily taken off the queue for an
++ * attribute change.
++ *
++ * This and ->enqueue() are related but not coupled. This operation
++ * notifies @p's state transition and may not be followed by ->enqueue()
++ * e.g. when @p is being dispatched to a remote CPU, or when @p is
++ * being enqueued on a CPU experiencing a hotplug event. Likewise, a
++ * task may be ->enqueue()'d without being preceded by this operation
++ * e.g. after exhausting its slice.
++ */
++ void (*runnable)(struct task_struct *p, u64 enq_flags);
++
++ /**
++ * @running: A task is starting to run on its associated CPU
++ * @p: task starting to run
++ *
++ * Note that this callback may be called from a CPU other than the
++ * one the task is going to run on. This can happen when a task
++ * property is changed (i.e., affinity), since scx_next_task_scx(),
++ * which triggers this callback, may run on a CPU different from
++ * the task's assigned CPU.
++ *
++ * Therefore, always use scx_bpf_task_cpu(@p) to determine the
++ * target CPU the task is going to use.
++ *
++ * See ->runnable() for explanation on the task state notifiers.
++ */
++ void (*running)(struct task_struct *p);
++
++ /**
++ * @stopping: A task is stopping execution
++ * @p: task stopping to run
++ * @runnable: is task @p still runnable?
++ *
++ * Note that this callback may be called from a CPU other than the
++ * one the task was running on. This can happen when a task
++ * property is changed (i.e., affinity), since dequeue_task_scx(),
++ * which triggers this callback, may run on a CPU different from
++ * the task's assigned CPU.
++ *
++ * Therefore, always use scx_bpf_task_cpu(@p) to retrieve the CPU
++ * the task was running on.
++ *
++ * See ->runnable() for explanation on the task state notifiers. If
++ * !@runnable, ->quiescent() will be invoked after this operation
++ * returns.
++ */
++ void (*stopping)(struct task_struct *p, bool runnable);
++
++ /**
++ * @quiescent: A task is becoming not runnable on its associated CPU
++ * @p: task becoming not runnable
++ * @deq_flags: %SCX_DEQ_*
++ *
++ * See ->runnable() for explanation on the task state notifiers.
++ *
++ * @p is becoming quiescent on the CPU because it's
++ *
++ * - sleeping (%SCX_DEQ_SLEEP)
++ * - being moved to another CPU
++ * - being temporarily taken off the queue for an attribute change
++ * (%SCX_DEQ_SAVE)
++ *
++ * This and ->dequeue() are related but not coupled. This operation
++ * notifies @p's state transition and may not be preceded by ->dequeue()
++ * e.g. when @p is being dispatched to a remote CPU.
++ */
++ void (*quiescent)(struct task_struct *p, u64 deq_flags);
++
++ /**
++ * @yield: Yield CPU
++ * @from: yielding task
++ * @to: optional yield target task
++ *
++ * If @to is NULL, @from is yielding the CPU to other runnable tasks.
++ * The BPF scheduler should ensure that other available tasks are
++ * dispatched before the yielding task. Return value is ignored in this
++ * case.
++ *
++ * If @to is not-NULL, @from wants to yield the CPU to @to. If the bpf
++ * scheduler can implement the request, return %true; otherwise, %false.
++ */
++ bool (*yield)(struct task_struct *from, struct task_struct *to);
++
++ /**
++ * @core_sched_before: Task ordering for core-sched
++ * @a: task A
++ * @b: task B
++ *
++ * Used by core-sched to determine the ordering between two tasks. See
++ * Documentation/admin-guide/hw-vuln/core-scheduling.rst for details on
++ * core-sched.
++ *
++ * Both @a and @b are runnable and may or may not currently be queued on
++ * the BPF scheduler. Should return %true if @a should run before @b.
++ * %false if there's no required ordering or @b should run before @a.
++ *
++ * If not specified, the default is ordering them according to when they
++ * became runnable.
++ */
++ bool (*core_sched_before)(struct task_struct *a, struct task_struct *b);
++
++ /**
++ * @set_weight: Set task weight
++ * @p: task to set weight for
++ * @weight: new weight [1..10000]
++ *
++ * Update @p's weight to @weight.
++ */
++ void (*set_weight)(struct task_struct *p, u32 weight);
++
++ /**
++ * @set_cpumask: Set CPU affinity
++ * @p: task to set CPU affinity for
++ * @cpumask: cpumask of cpus that @p can run on
++ *
++ * Update @p's CPU affinity to @cpumask.
++ */
++ void (*set_cpumask)(struct task_struct *p,
++ const struct cpumask *cpumask);
++
++ /**
++ * @update_idle: Update the idle state of a CPU
++ * @cpu: CPU to update the idle state for
++ * @idle: whether entering or exiting the idle state
++ *
++ * This operation is called when @rq's CPU goes or leaves the idle
++ * state. By default, implementing this operation disables the built-in
++ * idle CPU tracking and the following helpers become unavailable:
++ *
++ * - scx_bpf_select_cpu_dfl()
++ * - scx_bpf_select_cpu_and()
++ * - scx_bpf_test_and_clear_cpu_idle()
++ * - scx_bpf_pick_idle_cpu()
++ *
++ * The user also must implement ops.select_cpu() as the default
++ * implementation relies on scx_bpf_select_cpu_dfl().
++ *
++ * Specify the %SCX_OPS_KEEP_BUILTIN_IDLE flag to keep the built-in idle
++ * tracking.
++ */
++ void (*update_idle)(s32 cpu, bool idle);
++
++ /**
++ * @cpu_acquire: A CPU is becoming available to the BPF scheduler
++ * @cpu: The CPU being acquired by the BPF scheduler.
++ * @args: Acquire arguments, see the struct definition.
++ *
++ * A CPU that was previously released from the BPF scheduler is now once
++ * again under its control.
++ */
++ void (*cpu_acquire)(s32 cpu, struct scx_cpu_acquire_args *args);
++
++ /**
++ * @cpu_release: A CPU is taken away from the BPF scheduler
++ * @cpu: The CPU being released by the BPF scheduler.
++ * @args: Release arguments, see the struct definition.
++ *
++ * The specified CPU is no longer under the control of the BPF
++ * scheduler. This could be because it was preempted by a higher
++ * priority sched_class, though there may be other reasons as well. The
++ * caller should consult @args->reason to determine the cause.
++ */
++ void (*cpu_release)(s32 cpu, struct scx_cpu_release_args *args);
++
++ /**
++ * @init_task: Initialize a task to run in a BPF scheduler
++ * @p: task to initialize for BPF scheduling
++ * @args: init arguments, see the struct definition
++ *
++ * Either we're loading a BPF scheduler or a new task is being forked.
++ * Initialize @p for BPF scheduling. This operation may block and can
++ * be used for allocations, and is called exactly once for a task.
++ *
++ * Return 0 for success, -errno for failure. An error return while
++ * loading will abort loading of the BPF scheduler. During a fork, it
++ * will abort that specific fork.
++ */
++ s32 (*init_task)(struct task_struct *p, struct scx_init_task_args *args);
++
++ /**
++ * @exit_task: Exit a previously-running task from the system
++ * @p: task to exit
++ * @args: exit arguments, see the struct definition
++ *
++ * @p is exiting or the BPF scheduler is being unloaded. Perform any
++ * necessary cleanup for @p.
++ */
++ void (*exit_task)(struct task_struct *p, struct scx_exit_task_args *args);
++
++ /**
++ * @enable: Enable BPF scheduling for a task
++ * @p: task to enable BPF scheduling for
++ *
++ * Enable @p for BPF scheduling. enable() is called on @p any time it
++ * enters SCX, and is always paired with a matching disable().
++ */
++ void (*enable)(struct task_struct *p);
++
++ /**
++ * @disable: Disable BPF scheduling for a task
++ * @p: task to disable BPF scheduling for
++ *
++ * @p is exiting, leaving SCX or the BPF scheduler is being unloaded.
++ * Disable BPF scheduling for @p. A disable() call is always matched
++ * with a prior enable() call.
++ */
++ void (*disable)(struct task_struct *p);
++
++ /**
++ * @dump: Dump BPF scheduler state on error
++ * @ctx: debug dump context
++ *
++ * Use scx_bpf_dump() to generate BPF scheduler specific debug dump.
++ */
++ void (*dump)(struct scx_dump_ctx *ctx);
++
++ /**
++ * @dump_cpu: Dump BPF scheduler state for a CPU on error
++ * @ctx: debug dump context
++ * @cpu: CPU to generate debug dump for
++ * @idle: @cpu is currently idle without any runnable tasks
++ *
++ * Use scx_bpf_dump() to generate BPF scheduler specific debug dump for
++ * @cpu. If @idle is %true and this operation doesn't produce any
++ * output, @cpu is skipped for dump.
++ */
++ void (*dump_cpu)(struct scx_dump_ctx *ctx, s32 cpu, bool idle);
++
++ /**
++ * @dump_task: Dump BPF scheduler state for a runnable task on error
++ * @ctx: debug dump context
++ * @p: runnable task to generate debug dump for
++ *
++ * Use scx_bpf_dump() to generate BPF scheduler specific debug dump for
++ * @p.
++ */
++ void (*dump_task)(struct scx_dump_ctx *ctx, struct task_struct *p);
++
++#ifdef CONFIG_EXT_GROUP_SCHED
++ /**
++ * @cgroup_init: Initialize a cgroup
++ * @cgrp: cgroup being initialized
++ * @args: init arguments, see the struct definition
++ *
++ * Either the BPF scheduler is being loaded or @cgrp created, initialize
++ * @cgrp for sched_ext. This operation may block.
++ *
++ * Return 0 for success, -errno for failure. An error return while
++ * loading will abort loading of the BPF scheduler. During cgroup
++ * creation, it will abort the specific cgroup creation.
++ */
++ s32 (*cgroup_init)(struct cgroup *cgrp,
++ struct scx_cgroup_init_args *args);
++
++ /**
++ * @cgroup_exit: Exit a cgroup
++ * @cgrp: cgroup being exited
++ *
++ * Either the BPF scheduler is being unloaded or @cgrp destroyed, exit
++ * @cgrp for sched_ext. This operation my block.
++ */
++ void (*cgroup_exit)(struct cgroup *cgrp);
++
++ /**
++ * @cgroup_prep_move: Prepare a task to be moved to a different cgroup
++ * @p: task being moved
++ * @from: cgroup @p is being moved from
++ * @to: cgroup @p is being moved to
++ *
++ * Prepare @p for move from cgroup @from to @to. This operation may
++ * block and can be used for allocations.
++ *
++ * Return 0 for success, -errno for failure. An error return aborts the
++ * migration.
++ */
++ s32 (*cgroup_prep_move)(struct task_struct *p,
++ struct cgroup *from, struct cgroup *to);
++
++ /**
++ * @cgroup_move: Commit cgroup move
++ * @p: task being moved
++ * @from: cgroup @p is being moved from
++ * @to: cgroup @p is being moved to
++ *
++ * Commit the move. @p is dequeued during this operation.
++ */
++ void (*cgroup_move)(struct task_struct *p,
++ struct cgroup *from, struct cgroup *to);
++
++ /**
++ * @cgroup_cancel_move: Cancel cgroup move
++ * @p: task whose cgroup move is being canceled
++ * @from: cgroup @p was being moved from
++ * @to: cgroup @p was being moved to
++ *
++ * @p was cgroup_prep_move()'d but failed before reaching cgroup_move().
++ * Undo the preparation.
++ */
++ void (*cgroup_cancel_move)(struct task_struct *p,
++ struct cgroup *from, struct cgroup *to);
++
++ /**
++ * @cgroup_set_weight: A cgroup's weight is being changed
++ * @cgrp: cgroup whose weight is being updated
++ * @weight: new weight [1..10000]
++ *
++ * Update @cgrp's weight to @weight.
++ */
++ void (*cgroup_set_weight)(struct cgroup *cgrp, u32 weight);
++
++ /**
++ * @cgroup_set_bandwidth: A cgroup's bandwidth is being changed
++ * @cgrp: cgroup whose bandwidth is being updated
++ * @period_us: bandwidth control period
++ * @quota_us: bandwidth control quota
++ * @burst_us: bandwidth control burst
++ *
++ * Update @cgrp's bandwidth control parameters. This is from the cpu.max
++ * cgroup interface.
++ *
++ * @quota_us / @period_us determines the CPU bandwidth @cgrp is entitled
++ * to. For example, if @period_us is 1_000_000 and @quota_us is
++ * 2_500_000. @cgrp is entitled to 2.5 CPUs. @burst_us can be
++ * interpreted in the same fashion and specifies how much @cgrp can
++ * burst temporarily. The specific control mechanism and thus the
++ * interpretation of @period_us and burstiness is upto to the BPF
++ * scheduler.
++ */
++ void (*cgroup_set_bandwidth)(struct cgroup *cgrp,
++ u64 period_us, u64 quota_us, u64 burst_us);
++
++#endif /* CONFIG_EXT_GROUP_SCHED */
++
++ /*
++ * All online ops must come before ops.cpu_online().
++ */
++
++ /**
++ * @cpu_online: A CPU became online
++ * @cpu: CPU which just came up
++ *
++ * @cpu just came online. @cpu will not call ops.enqueue() or
++ * ops.dispatch(), nor run tasks associated with other CPUs beforehand.
++ */
++ void (*cpu_online)(s32 cpu);
++
++ /**
++ * @cpu_offline: A CPU is going offline
++ * @cpu: CPU which is going offline
++ *
++ * @cpu is going offline. @cpu will not call ops.enqueue() or
++ * ops.dispatch(), nor run tasks associated with other CPUs afterwards.
++ */
++ void (*cpu_offline)(s32 cpu);
++
++ /*
++ * All CPU hotplug ops must come before ops.init().
++ */
++
++ /**
++ * @init: Initialize the BPF scheduler
++ */
++ s32 (*init)(void);
++
++ /**
++ * @exit: Clean up after the BPF scheduler
++ * @info: Exit info
++ *
++ * ops.exit() is also called on ops.init() failure, which is a bit
++ * unusual. This is to allow rich reporting through @info on how
++ * ops.init() failed.
++ */
++ void (*exit)(struct scx_exit_info *info);
++
++ /**
++ * @dispatch_max_batch: Max nr of tasks that dispatch() can dispatch
++ */
++ u32 dispatch_max_batch;
++
++ /**
++ * @flags: %SCX_OPS_* flags
++ */
++ u64 flags;
++
++ /**
++ * @timeout_ms: The maximum amount of time, in milliseconds, that a
++ * runnable task should be able to wait before being scheduled. The
++ * maximum timeout may not exceed the default timeout of 30 seconds.
++ *
++ * Defaults to the maximum allowed timeout value of 30 seconds.
++ */
++ u32 timeout_ms;
++
++ /**
++ * @exit_dump_len: scx_exit_info.dump buffer length. If 0, the default
++ * value of 32768 is used.
++ */
++ u32 exit_dump_len;
++
++ /**
++ * @hotplug_seq: A sequence number that may be set by the scheduler to
++ * detect when a hotplug event has occurred during the loading process.
++ * If 0, no detection occurs. Otherwise, the scheduler will fail to
++ * load if the sequence number does not match @scx_hotplug_seq on the
++ * enable path.
++ */
++ u64 hotplug_seq;
++
++ /**
++ * @name: BPF scheduler's name
++ *
++ * Must be a non-zero valid BPF object name including only isalnum(),
++ * '_' and '.' chars. Shows up in kernel.sched_ext_ops sysctl while the
++ * BPF scheduler is enabled.
++ */
++ char name[SCX_OPS_NAME_LEN];
++
++ /* internal use only, must be NULL */
++ void *priv;
++};
++
++enum scx_opi {
++ SCX_OPI_BEGIN = 0,
++ SCX_OPI_NORMAL_BEGIN = 0,
++ SCX_OPI_NORMAL_END = SCX_OP_IDX(cpu_online),
++ SCX_OPI_CPU_HOTPLUG_BEGIN = SCX_OP_IDX(cpu_online),
++ SCX_OPI_CPU_HOTPLUG_END = SCX_OP_IDX(init),
++ SCX_OPI_END = SCX_OP_IDX(init),
++};
++
++/*
++ * Collection of event counters. Event types are placed in descending order.
++ */
++struct scx_event_stats {
++ /*
++ * If ops.select_cpu() returns a CPU which can't be used by the task,
++ * the core scheduler code silently picks a fallback CPU.
++ */
++ s64 SCX_EV_SELECT_CPU_FALLBACK;
++
++ /*
++ * When dispatching to a local DSQ, the CPU may have gone offline in
++ * the meantime. In this case, the task is bounced to the global DSQ.
++ */
++ s64 SCX_EV_DISPATCH_LOCAL_DSQ_OFFLINE;
++
++ /*
++ * If SCX_OPS_ENQ_LAST is not set, the number of times that a task
++ * continued to run because there were no other tasks on the CPU.
++ */
++ s64 SCX_EV_DISPATCH_KEEP_LAST;
++
++ /*
++ * If SCX_OPS_ENQ_EXITING is not set, the number of times that a task
++ * is dispatched to a local DSQ when exiting.
++ */
++ s64 SCX_EV_ENQ_SKIP_EXITING;
++
++ /*
++ * If SCX_OPS_ENQ_MIGRATION_DISABLED is not set, the number of times a
++ * migration disabled task skips ops.enqueue() and is dispatched to its
++ * local DSQ.
++ */
++ s64 SCX_EV_ENQ_SKIP_MIGRATION_DISABLED;
++
++ /*
++ * Total number of times a task's time slice was refilled with the
++ * default value (SCX_SLICE_DFL).
++ */
++ s64 SCX_EV_REFILL_SLICE_DFL;
++
++ /*
++ * The total duration of bypass modes in nanoseconds.
++ */
++ s64 SCX_EV_BYPASS_DURATION;
++
++ /*
++ * The number of tasks dispatched in the bypassing mode.
++ */
++ s64 SCX_EV_BYPASS_DISPATCH;
++
++ /*
++ * The number of times the bypassing mode has been activated.
++ */
++ s64 SCX_EV_BYPASS_ACTIVATE;
++};
++
++struct scx_sched {
++ struct sched_ext_ops ops;
++ DECLARE_BITMAP(has_op, SCX_OPI_END);
++
++ /*
++ * Dispatch queues.
++ *
++ * The global DSQ (%SCX_DSQ_GLOBAL) is split per-node for scalability.
++ * This is to avoid live-locking in bypass mode where all tasks are
++ * dispatched to %SCX_DSQ_GLOBAL and all CPUs consume from it. If
++ * per-node split isn't sufficient, it can be further split.
++ */
++ struct rhashtable dsq_hash;
++ struct scx_dispatch_q **global_dsqs;
++
++ /*
++ * The event counters are in a per-CPU variable to minimize the
++ * accounting overhead. A system-wide view on the event counter is
++ * constructed when requested by scx_bpf_events().
++ */
++ struct scx_event_stats __percpu *event_stats_cpu;
++
++ bool warned_zero_slice;
++
++ atomic_t exit_kind;
++ struct scx_exit_info *exit_info;
++
++ struct kobject kobj;
++
++ struct kthread_worker *helper;
++ struct irq_work error_irq_work;
++ struct kthread_work disable_work;
++ struct rcu_work rcu_work;
++};
++
++enum scx_wake_flags {
++ /* expose select WF_* flags as enums */
++ SCX_WAKE_FORK = WF_FORK,
++ SCX_WAKE_TTWU = WF_TTWU,
++ SCX_WAKE_SYNC = WF_SYNC,
++};
++
++enum scx_enq_flags {
++ /* expose select ENQUEUE_* flags as enums */
++ SCX_ENQ_WAKEUP = ENQUEUE_WAKEUP,
++ SCX_ENQ_HEAD = ENQUEUE_HEAD,
++ SCX_ENQ_CPU_SELECTED = ENQUEUE_RQ_SELECTED,
++
++ /* high 32bits are SCX specific */
++
++ /*
++ * Set the following to trigger preemption when calling
++ * scx_bpf_dsq_insert() with a local dsq as the target. The slice of the
++ * current task is cleared to zero and the CPU is kicked into the
++ * scheduling path. Implies %SCX_ENQ_HEAD.
++ */
++ SCX_ENQ_PREEMPT = 1LLU << 32,
++
++ /*
++ * The task being enqueued was previously enqueued on the current CPU's
++ * %SCX_DSQ_LOCAL, but was removed from it in a call to the
++ * scx_bpf_reenqueue_local() kfunc. If scx_bpf_reenqueue_local() was
++ * invoked in a ->cpu_release() callback, and the task is again
++ * dispatched back to %SCX_LOCAL_DSQ by this current ->enqueue(), the
++ * task will not be scheduled on the CPU until at least the next invocation
++ * of the ->cpu_acquire() callback.
++ */
++ SCX_ENQ_REENQ = 1LLU << 40,
++
++ /*
++ * The task being enqueued is the only task available for the cpu. By
++ * default, ext core keeps executing such tasks but when
++ * %SCX_OPS_ENQ_LAST is specified, they're ops.enqueue()'d with the
++ * %SCX_ENQ_LAST flag set.
++ *
++ * The BPF scheduler is responsible for triggering a follow-up
++ * scheduling event. Otherwise, Execution may stall.
++ */
++ SCX_ENQ_LAST = 1LLU << 41,
++
++ /* high 8 bits are internal */
++ __SCX_ENQ_INTERNAL_MASK = 0xffLLU << 56,
++
++ SCX_ENQ_CLEAR_OPSS = 1LLU << 56,
++ SCX_ENQ_DSQ_PRIQ = 1LLU << 57,
++};
++
++enum scx_deq_flags {
++ /* expose select DEQUEUE_* flags as enums */
++ SCX_DEQ_SLEEP = DEQUEUE_SLEEP,
++
++ /* high 32bits are SCX specific */
++
++ /*
++ * The generic core-sched layer decided to execute the task even though
++ * it hasn't been dispatched yet. Dequeue from the BPF side.
++ */
++ SCX_DEQ_CORE_SCHED_EXEC = 1LLU << 32,
++};
++
++enum scx_pick_idle_cpu_flags {
++ SCX_PICK_IDLE_CORE = 1LLU << 0, /* pick a CPU whose SMT siblings are also idle */
++ SCX_PICK_IDLE_IN_NODE = 1LLU << 1, /* pick a CPU in the same target NUMA node */
++};
++
++enum scx_kick_flags {
++ /*
++ * Kick the target CPU if idle. Guarantees that the target CPU goes
++ * through at least one full scheduling cycle before going idle. If the
++ * target CPU can be determined to be currently not idle and going to go
++ * through a scheduling cycle before going idle, noop.
++ */
++ SCX_KICK_IDLE = 1LLU << 0,
++
++ /*
++ * Preempt the current task and execute the dispatch path. If the
++ * current task of the target CPU is an SCX task, its ->scx.slice is
++ * cleared to zero before the scheduling path is invoked so that the
++ * task expires and the dispatch path is invoked.
++ */
++ SCX_KICK_PREEMPT = 1LLU << 1,
++
++ /*
++ * Wait for the CPU to be rescheduled. The scx_bpf_kick_cpu() call will
++ * return after the target CPU finishes picking the next task.
++ */
++ SCX_KICK_WAIT = 1LLU << 2,
++};
++
++enum scx_tg_flags {
++ SCX_TG_ONLINE = 1U << 0,
++ SCX_TG_INITED = 1U << 1,
++};
++
++enum scx_enable_state {
++ SCX_ENABLING,
++ SCX_ENABLED,
++ SCX_DISABLING,
++ SCX_DISABLED,
++};
++
++static const char *scx_enable_state_str[] = {
++ [SCX_ENABLING] = "enabling",
++ [SCX_ENABLED] = "enabled",
++ [SCX_DISABLING] = "disabling",
++ [SCX_DISABLED] = "disabled",
++};
++
++/*
++ * sched_ext_entity->ops_state
++ *
++ * Used to track the task ownership between the SCX core and the BPF scheduler.
++ * State transitions look as follows:
++ *
++ * NONE -> QUEUEING -> QUEUED -> DISPATCHING
++ * ^ | |
++ * | v v
++ * \-------------------------------/
++ *
++ * QUEUEING and DISPATCHING states can be waited upon. See wait_ops_state() call
++ * sites for explanations on the conditions being waited upon and why they are
++ * safe. Transitions out of them into NONE or QUEUED must store_release and the
++ * waiters should load_acquire.
++ *
++ * Tracking scx_ops_state enables sched_ext core to reliably determine whether
++ * any given task can be dispatched by the BPF scheduler at all times and thus
++ * relaxes the requirements on the BPF scheduler. This allows the BPF scheduler
++ * to try to dispatch any task anytime regardless of its state as the SCX core
++ * can safely reject invalid dispatches.
++ */
++enum scx_ops_state {
++ SCX_OPSS_NONE, /* owned by the SCX core */
++ SCX_OPSS_QUEUEING, /* in transit to the BPF scheduler */
++ SCX_OPSS_QUEUED, /* owned by the BPF scheduler */
++ SCX_OPSS_DISPATCHING, /* in transit back to the SCX core */
++
++ /*
++ * QSEQ brands each QUEUED instance so that, when dispatch races
++ * dequeue/requeue, the dispatcher can tell whether it still has a claim
++ * on the task being dispatched.
++ *
++ * As some 32bit archs can't do 64bit store_release/load_acquire,
++ * p->scx.ops_state is atomic_long_t which leaves 30 bits for QSEQ on
++ * 32bit machines. The dispatch race window QSEQ protects is very narrow
++ * and runs with IRQ disabled. 30 bits should be sufficient.
++ */
++ SCX_OPSS_QSEQ_SHIFT = 2,
++};
++
++/* Use macros to ensure that the type is unsigned long for the masks */
++#define SCX_OPSS_STATE_MASK ((1LU << SCX_OPSS_QSEQ_SHIFT) - 1)
++#define SCX_OPSS_QSEQ_MASK (~SCX_OPSS_STATE_MASK)
++
++DECLARE_PER_CPU(struct rq *, scx_locked_rq_state);
++
++/*
++ * Return the rq currently locked from an scx callback, or NULL if no rq is
++ * locked.
++ */
++static inline struct rq *scx_locked_rq(void)
++{
++ return __this_cpu_read(scx_locked_rq_state);
++}
++
++static inline bool scx_kf_allowed_if_unlocked(void)
++{
++ return !current->scx.kf_mask;
++}
++
++static inline bool scx_rq_bypassing(struct rq *rq)
++{
++ return unlikely(rq->scx.flags & SCX_RQ_BYPASSING);
++}
+--
+2.51.0
+
--- /dev/null
+From d30e5472caf956fd0d6267d20b2c9f45871ae70a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 3 Sep 2025 11:33:28 -1000
+Subject: sched_ext: Put event_stats_cpu in struct scx_sched_pcpu
+
+From: Tejun Heo <tj@kernel.org>
+
+[ Upstream commit bcb7c2305682c77a8bfdbfe37106b314ac10110f ]
+
+scx_sched.event_stats_cpu is the percpu counters that are used to track
+stats. Introduce struct scx_sched_pcpu and move the counters inside. This
+will ease adding more per-cpu fields. No functional changes.
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Acked-by: Andrea Righi <arighi@nvidia.com>
+Stable-dep-of: efeeaac9ae97 ("sched_ext: Sync error_irq_work before freeing scx_sched")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/ext.c | 18 +++++++++---------
+ kernel/sched/ext_internal.h | 17 ++++++++++-------
+ 2 files changed, 19 insertions(+), 16 deletions(-)
+
+diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
+index 8ecde1abb4e28..46029050b170f 100644
+--- a/kernel/sched/ext.c
++++ b/kernel/sched/ext.c
+@@ -630,7 +630,7 @@ static struct task_struct *scx_task_iter_next_locked(struct scx_task_iter *iter)
+ * This can be used when preemption is not disabled.
+ */
+ #define scx_add_event(sch, name, cnt) do { \
+- this_cpu_add((sch)->event_stats_cpu->name, (cnt)); \
++ this_cpu_add((sch)->pcpu->event_stats.name, (cnt)); \
+ trace_sched_ext_event(#name, (cnt)); \
+ } while(0)
+
+@@ -643,7 +643,7 @@ static struct task_struct *scx_task_iter_next_locked(struct scx_task_iter *iter)
+ * This should be used only when preemption is disabled.
+ */
+ #define __scx_add_event(sch, name, cnt) do { \
+- __this_cpu_add((sch)->event_stats_cpu->name, (cnt)); \
++ __this_cpu_add((sch)->pcpu->event_stats.name, (cnt)); \
+ trace_sched_ext_event(#name, cnt); \
+ } while(0)
+
+@@ -3538,7 +3538,7 @@ static void scx_sched_free_rcu_work(struct work_struct *work)
+ int node;
+
+ kthread_stop(sch->helper->task);
+- free_percpu(sch->event_stats_cpu);
++ free_percpu(sch->pcpu);
+
+ for_each_node_state(node, N_POSSIBLE)
+ kfree(sch->global_dsqs[node]);
+@@ -4439,13 +4439,13 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops)
+ sch->global_dsqs[node] = dsq;
+ }
+
+- sch->event_stats_cpu = alloc_percpu(struct scx_event_stats);
+- if (!sch->event_stats_cpu)
++ sch->pcpu = alloc_percpu(struct scx_sched_pcpu);
++ if (!sch->pcpu)
+ goto err_free_gdsqs;
+
+ sch->helper = kthread_run_worker(0, "sched_ext_helper");
+ if (!sch->helper)
+- goto err_free_event_stats;
++ goto err_free_pcpu;
+ sched_set_fifo(sch->helper->task);
+
+ atomic_set(&sch->exit_kind, SCX_EXIT_NONE);
+@@ -4463,8 +4463,8 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops)
+
+ err_stop_helper:
+ kthread_stop(sch->helper->task);
+-err_free_event_stats:
+- free_percpu(sch->event_stats_cpu);
++err_free_pcpu:
++ free_percpu(sch->pcpu);
+ err_free_gdsqs:
+ for_each_node_state(node, N_POSSIBLE)
+ kfree(sch->global_dsqs[node]);
+@@ -6490,7 +6490,7 @@ static void scx_read_events(struct scx_sched *sch, struct scx_event_stats *event
+ /* Aggregate per-CPU event counters into @events. */
+ memset(events, 0, sizeof(*events));
+ for_each_possible_cpu(cpu) {
+- e_cpu = per_cpu_ptr(sch->event_stats_cpu, cpu);
++ e_cpu = &per_cpu_ptr(sch->pcpu, cpu)->event_stats;
+ scx_agg_event(events, e_cpu, SCX_EV_SELECT_CPU_FALLBACK);
+ scx_agg_event(events, e_cpu, SCX_EV_DISPATCH_LOCAL_DSQ_OFFLINE);
+ scx_agg_event(events, e_cpu, SCX_EV_DISPATCH_KEEP_LAST);
+diff --git a/kernel/sched/ext_internal.h b/kernel/sched/ext_internal.h
+index 76690ede8700f..af4c054fb6f85 100644
+--- a/kernel/sched/ext_internal.h
++++ b/kernel/sched/ext_internal.h
+@@ -846,6 +846,15 @@ struct scx_event_stats {
+ s64 SCX_EV_BYPASS_ACTIVATE;
+ };
+
++struct scx_sched_pcpu {
++ /*
++ * The event counters are in a per-CPU variable to minimize the
++ * accounting overhead. A system-wide view on the event counter is
++ * constructed when requested by scx_bpf_events().
++ */
++ struct scx_event_stats event_stats;
++};
++
+ struct scx_sched {
+ struct sched_ext_ops ops;
+ DECLARE_BITMAP(has_op, SCX_OPI_END);
+@@ -860,13 +869,7 @@ struct scx_sched {
+ */
+ struct rhashtable dsq_hash;
+ struct scx_dispatch_q **global_dsqs;
+-
+- /*
+- * The event counters are in a per-CPU variable to minimize the
+- * accounting overhead. A system-wide view on the event counter is
+- * constructed when requested by scx_bpf_events().
+- */
+- struct scx_event_stats __percpu *event_stats_cpu;
++ struct scx_sched_pcpu __percpu *pcpu;
+
+ bool warned_zero_slice;
+
+--
+2.51.0
+
--- /dev/null
+From 03175244f8b0c9d1e7bae5998f3bcfb7f77c59c7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 9 Oct 2025 13:56:23 -1000
+Subject: sched_ext: Sync error_irq_work before freeing scx_sched
+
+From: Tejun Heo <tj@kernel.org>
+
+[ Upstream commit efeeaac9ae9763f9c953e69633c86bc3031e39b5 ]
+
+By the time scx_sched_free_rcu_work() runs, the scx_sched is no longer
+reachable. However, a previously queued error_irq_work may still be pending or
+running. Ensure it completes before proceeding with teardown.
+
+Fixes: bff3b5aec1b7 ("sched_ext: Move disable machinery into scx_sched")
+Acked-by: Andrea Righi <arighi@nvidia.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/ext.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
+index 46029050b170f..f89894476e51f 100644
+--- a/kernel/sched/ext.c
++++ b/kernel/sched/ext.c
+@@ -3537,7 +3537,9 @@ static void scx_sched_free_rcu_work(struct work_struct *work)
+ struct scx_dispatch_q *dsq;
+ int node;
+
++ irq_work_sync(&sch->error_irq_work);
+ kthread_stop(sch->helper->task);
++
+ free_percpu(sch->pcpu);
+
+ for_each_node_state(node, N_POSSIBLE)
+--
+2.51.0
+
--- /dev/null
+From fe915f3331ace294cf2bb31d41fdcb2842b01530 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 20 Jul 2025 13:21:30 +0200
+Subject: seccomp: passthrough uprobe systemcall without filtering
+
+From: Jiri Olsa <jolsa@kernel.org>
+
+[ Upstream commit 89d1d8434d246c96309a6068dfcf9e36dc61227b ]
+
+Adding uprobe as another exception to the seccomp filter alongside
+with the uretprobe syscall.
+
+Same as the uretprobe the uprobe syscall is installed by kernel as
+replacement for the breakpoint exception and is limited to x86_64
+arch and isn't expected to ever be supported in i386.
+
+Signed-off-by: Jiri Olsa <jolsa@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Kees Cook <kees@kernel.org>
+Link: https://lore.kernel.org/r/20250720112133.244369-21-jolsa@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/seccomp.c | 32 +++++++++++++++++++++++++-------
+ 1 file changed, 25 insertions(+), 7 deletions(-)
+
+diff --git a/kernel/seccomp.c b/kernel/seccomp.c
+index 3bbfba30a777a..25f62867a16d9 100644
+--- a/kernel/seccomp.c
++++ b/kernel/seccomp.c
+@@ -741,6 +741,26 @@ seccomp_prepare_user_filter(const char __user *user_filter)
+ }
+
+ #ifdef SECCOMP_ARCH_NATIVE
++static bool seccomp_uprobe_exception(struct seccomp_data *sd)
++{
++#if defined __NR_uretprobe || defined __NR_uprobe
++#ifdef SECCOMP_ARCH_COMPAT
++ if (sd->arch == SECCOMP_ARCH_NATIVE)
++#endif
++ {
++#ifdef __NR_uretprobe
++ if (sd->nr == __NR_uretprobe)
++ return true;
++#endif
++#ifdef __NR_uprobe
++ if (sd->nr == __NR_uprobe)
++ return true;
++#endif
++ }
++#endif
++ return false;
++}
++
+ /**
+ * seccomp_is_const_allow - check if filter is constant allow with given data
+ * @fprog: The BPF programs
+@@ -758,13 +778,8 @@ static bool seccomp_is_const_allow(struct sock_fprog_kern *fprog,
+ return false;
+
+ /* Our single exception to filtering. */
+-#ifdef __NR_uretprobe
+-#ifdef SECCOMP_ARCH_COMPAT
+- if (sd->arch == SECCOMP_ARCH_NATIVE)
+-#endif
+- if (sd->nr == __NR_uretprobe)
+- return true;
+-#endif
++ if (seccomp_uprobe_exception(sd))
++ return true;
+
+ for (pc = 0; pc < fprog->len; pc++) {
+ struct sock_filter *insn = &fprog->filter[pc];
+@@ -1042,6 +1057,9 @@ static const int mode1_syscalls[] = {
+ __NR_seccomp_read, __NR_seccomp_write, __NR_seccomp_exit, __NR_seccomp_sigreturn,
+ #ifdef __NR_uretprobe
+ __NR_uretprobe,
++#endif
++#ifdef __NR_uprobe
++ __NR_uprobe,
+ #endif
+ -1, /* negative terminated */
+ };
+--
+2.51.0
+
--- /dev/null
+sched_ext-move-internal-type-and-accessor-definition.patch
+sched_ext-put-event_stats_cpu-in-struct-scx_sched_pc.patch
+sched_ext-sync-error_irq_work-before-freeing-scx_sch.patch
+timekeeping-fix-aux-clocks-sysfs-initialization-loop.patch
+x86-bugs-report-correct-retbleed-mitigation-status.patch
+x86-bugs-qualify-retbleed_intel_msg.patch
+genirq-chip-add-buslock-back-in-to-irq_set_handler.patch
+genirq-manage-add-buslock-back-in-to-__disable_irq_n.patch
+genirq-manage-add-buslock-back-in-to-enable_irq.patch
+audit-record-fanotify-event-regardless-of-presence-o.patch
+edac-ie31200-add-two-more-intel-alder-lake-s-socs-fo.patch
+perf-x86-intel-add-icl_fixed_0_adaptive-bit-into-int.patch
+perf-use-current-flags-pf_kthread-pf_user_worker-ins.patch
+perf-have-get_perf_callchain-return-null-if-crosstas.patch
+perf-skip-user-unwind-if-the-task-is-a-kernel-thread.patch
+edac-fix-wrong-executable-file-modes-for-c-source-fi.patch
+seccomp-passthrough-uprobe-systemcall-without-filter.patch
+sched_ext-keep-bypass-on-between-enable-failure-and-.patch
+x86-bugs-add-attack-vector-controls-for-vmscape.patch
+sched-fair-update_cfs_group-for-throttled-cfs_rqs.patch
+x86-bugs-fix-reporting-of-lfence-retpoline.patch
+edac-mc_sysfs-increase-legacy-channel-support-to-16.patch
+cpuset-use-new-excpus-for-nocpu-error-check-when-ena.patch
+btrfs-abort-transaction-on-specific-error-places-whe.patch
+btrfs-abort-transaction-in-the-process_one_buffer-lo.patch
+btrfs-zoned-return-error-from-btrfs_zone_finish_endi.patch
+btrfs-zoned-refine-extent-allocator-hint-selection.patch
+btrfs-scrub-replace-max_t-min_t-with-clamp-in-scrub_.patch
+btrfs-always-drop-log-root-tree-reference-in-btrfs_r.patch
+btrfs-use-level-argument-in-log-tree-walk-callback-r.patch
+btrfs-abort-transaction-if-we-fail-to-update-inode-i.patch
+btrfs-tree-checker-add-inode-extref-checks.patch
+btrfs-use-smp_mb__after_atomic-when-forcing-cow-in-c.patch
+sched_ext-make-qmap-dump-operation-non-destructive.patch
+arch-add-the-macro-compile_offsets-to-all-the-asm-of.patch
--- /dev/null
+From 03823cc4dcccf525a9b20bef586082b1dcc89adf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Oct 2025 14:17:53 +0800
+Subject: timekeeping: Fix aux clocks sysfs initialization loop bound
+
+From: Haofeng Li <lihaofeng@kylinos.cn>
+
+[ Upstream commit 39a9ed0fb6dac58547afdf9b6cb032d326a3698f ]
+
+The loop in tk_aux_sysfs_init() uses `i <= MAX_AUX_CLOCKS` as the
+termination condition, which results in 9 iterations (i=0 to 8) when
+MAX_AUX_CLOCKS is defined as 8. However, the kernel is designed to support
+only up to 8 auxiliary clocks.
+
+This off-by-one error causes the creation of a 9th sysfs entry that exceeds
+the intended auxiliary clock range.
+
+Fix the loop bound to use `i < MAX_AUX_CLOCKS` to ensure exactly 8
+auxiliary clock entries are created, matching the design specification.
+
+Fixes: 7b95663a3d96 ("timekeeping: Provide interface to control auxiliary clocks")
+Signed-off-by: Haofeng Li <lihaofeng@kylinos.cn>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Link: https://patch.msgid.link/tencent_2376993D9FC06A3616A4F981B3DE1C599607@qq.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/time/timekeeping.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
+index b6974fce800cd..3a4d3b2e3f740 100644
+--- a/kernel/time/timekeeping.c
++++ b/kernel/time/timekeeping.c
+@@ -3070,7 +3070,7 @@ static int __init tk_aux_sysfs_init(void)
+ return -ENOMEM;
+ }
+
+- for (int i = 0; i <= MAX_AUX_CLOCKS; i++) {
++ for (int i = 0; i < MAX_AUX_CLOCKS; i++) {
+ char id[2] = { [0] = '0' + i, };
+ struct kobject *clk = kobject_create_and_add(id, auxo);
+
+--
+2.51.0
+
--- /dev/null
+From 60bd79a607d557eed0d51b5455016a10ea60aafc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 12 Sep 2025 10:24:28 -0500
+Subject: x86/bugs: Add attack vector controls for VMSCAPE
+
+From: David Kaplan <david.kaplan@amd.com>
+
+[ Upstream commit 5799d5d8a6c877f03ad5b5a640977053be45059a ]
+
+Use attack vector controls to select whether VMSCAPE requires mitigation,
+similar to other bugs.
+
+Signed-off-by: David Kaplan <david.kaplan@amd.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../admin-guide/hw-vuln/attack_vector_controls.rst | 1 +
+ arch/x86/kernel/cpu/bugs.c | 14 ++++++++++----
+ 2 files changed, 11 insertions(+), 4 deletions(-)
+
+diff --git a/Documentation/admin-guide/hw-vuln/attack_vector_controls.rst b/Documentation/admin-guide/hw-vuln/attack_vector_controls.rst
+index 5964901d66e31..d0bdbd81dcf9f 100644
+--- a/Documentation/admin-guide/hw-vuln/attack_vector_controls.rst
++++ b/Documentation/admin-guide/hw-vuln/attack_vector_controls.rst
+@@ -218,6 +218,7 @@ SRSO X X X X
+ SSB X
+ TAA X X X X * (Note 2)
+ TSA X X X X
++VMSCAPE X
+ =============== ============== ============ ============= ============== ============ ========
+
+ Notes:
+diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
+index 9750ce448e626..c6bb8e76eb984 100644
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -434,6 +434,9 @@ static bool __init should_mitigate_vuln(unsigned int bug)
+ case X86_BUG_SPEC_STORE_BYPASS:
+ return cpu_attack_vector_mitigated(CPU_MITIGATE_USER_USER);
+
++ case X86_BUG_VMSCAPE:
++ return cpu_attack_vector_mitigated(CPU_MITIGATE_GUEST_HOST);
++
+ default:
+ WARN(1, "Unknown bug %x\n", bug);
+ return false;
+@@ -3308,15 +3311,18 @@ early_param("vmscape", vmscape_parse_cmdline);
+
+ static void __init vmscape_select_mitigation(void)
+ {
+- if (cpu_mitigations_off() ||
+- !boot_cpu_has_bug(X86_BUG_VMSCAPE) ||
++ if (!boot_cpu_has_bug(X86_BUG_VMSCAPE) ||
+ !boot_cpu_has(X86_FEATURE_IBPB)) {
+ vmscape_mitigation = VMSCAPE_MITIGATION_NONE;
+ return;
+ }
+
+- if (vmscape_mitigation == VMSCAPE_MITIGATION_AUTO)
+- vmscape_mitigation = VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER;
++ if (vmscape_mitigation == VMSCAPE_MITIGATION_AUTO) {
++ if (should_mitigate_vuln(X86_BUG_VMSCAPE))
++ vmscape_mitigation = VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER;
++ else
++ vmscape_mitigation = VMSCAPE_MITIGATION_NONE;
++ }
+ }
+
+ static void __init vmscape_update_mitigation(void)
+--
+2.51.0
+
--- /dev/null
+From 705fc41b44d203b1500a524f3fb04ba1c63cd931 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 15 Sep 2025 08:47:05 -0500
+Subject: x86/bugs: Fix reporting of LFENCE retpoline
+
+From: David Kaplan <david.kaplan@amd.com>
+
+[ Upstream commit d1cc1baef67ac6c09b74629ca053bf3fb812f7dc ]
+
+The LFENCE retpoline mitigation is not secure but the kernel prints
+inconsistent messages about this fact. The dmesg log says 'Mitigation:
+LFENCE', implying the system is mitigated. But sysfs reports 'Vulnerable:
+LFENCE' implying the system (correctly) is not mitigated.
+
+Fix this by printing a consistent 'Vulnerable: LFENCE' string everywhere
+when this mitigation is selected.
+
+Signed-off-by: David Kaplan <david.kaplan@amd.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://lore.kernel.org/20250915134706.3201818-1-david.kaplan@amd.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kernel/cpu/bugs.c | 5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
+index c6bb8e76eb984..26ece97011fd7 100644
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -2052,7 +2052,7 @@ static void __init spectre_v2_user_apply_mitigation(void)
+ static const char * const spectre_v2_strings[] = {
+ [SPECTRE_V2_NONE] = "Vulnerable",
+ [SPECTRE_V2_RETPOLINE] = "Mitigation: Retpolines",
+- [SPECTRE_V2_LFENCE] = "Mitigation: LFENCE",
++ [SPECTRE_V2_LFENCE] = "Vulnerable: LFENCE",
+ [SPECTRE_V2_EIBRS] = "Mitigation: Enhanced / Automatic IBRS",
+ [SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced / Automatic IBRS + LFENCE",
+ [SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced / Automatic IBRS + Retpolines",
+@@ -3636,9 +3636,6 @@ static const char *spectre_bhi_state(void)
+
+ static ssize_t spectre_v2_show_state(char *buf)
+ {
+- if (spectre_v2_enabled == SPECTRE_V2_LFENCE)
+- return sysfs_emit(buf, "Vulnerable: LFENCE\n");
+-
+ if (spectre_v2_enabled == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled())
+ return sysfs_emit(buf, "Vulnerable: eIBRS with unprivileged eBPF\n");
+
+--
+2.51.0
+
--- /dev/null
+From 9965d529966df68e304d4db15a0da58fce023b71 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 3 Oct 2025 12:19:36 -0500
+Subject: x86/bugs: Qualify RETBLEED_INTEL_MSG
+
+From: David Kaplan <david.kaplan@amd.com>
+
+[ Upstream commit 204ced4108f5d38f6804968fd9543cc69c3f8da6 ]
+
+When retbleed mitigation is disabled, the kernel already prints an info
+message that the system is vulnerable. Recent code restructuring also
+inadvertently led to RETBLEED_INTEL_MSG being printed as an error, which is
+unnecessary as retbleed mitigation was already explicitly disabled (by config
+option, cmdline, etc.).
+
+Qualify this print statement so the warning is not printed unless an actual
+retbleed mitigation was selected and is being disabled due to incompatibility
+with spectre_v2.
+
+Fixes: e3b78a7ad5ea ("x86/bugs: Restructure retbleed mitigation")
+Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220624
+Signed-off-by: David Kaplan <david.kaplan@amd.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://patch.msgid.link/20251003171936.155391-1-david.kaplan@amd.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kernel/cpu/bugs.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
+index bf79ff6a1f662..9750ce448e626 100644
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -1461,7 +1461,9 @@ static void __init retbleed_update_mitigation(void)
+ break;
+ default:
+ if (retbleed_mitigation != RETBLEED_MITIGATION_STUFF) {
+- pr_err(RETBLEED_INTEL_MSG);
++ if (retbleed_mitigation != RETBLEED_MITIGATION_NONE)
++ pr_err(RETBLEED_INTEL_MSG);
++
+ retbleed_mitigation = RETBLEED_MITIGATION_NONE;
+ }
+ }
+--
+2.51.0
+
--- /dev/null
+From 029a4346ea7f82d5882b314eca129b1591db28b4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 15 Sep 2025 08:47:06 -0500
+Subject: x86/bugs: Report correct retbleed mitigation status
+
+From: David Kaplan <david.kaplan@amd.com>
+
+[ Upstream commit 930f2361fe542a00de9ce6070b1b6edb976f1165 ]
+
+On Intel CPUs, the default retbleed mitigation is IBRS/eIBRS but this
+requires that a similar spectre_v2 mitigation is applied. If the user
+selects a different spectre_v2 mitigation (like spectre_v2=retpoline) a
+warning is printed but sysfs will still report 'Mitigation: IBRS' or
+'Mitigation: Enhanced IBRS'. This is incorrect because retbleed is not
+mitigated, and IBRS is not actually set.
+
+Fix this by choosing RETBLEED_MITIGATION_NONE in this scenario so the
+kernel correctly reports the system as vulnerable to retbleed.
+
+Signed-off-by: David Kaplan <david.kaplan@amd.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://lore.kernel.org/20250915134706.3201818-1-david.kaplan@amd.com
+Stable-dep-of: 204ced4108f5 ("x86/bugs: Qualify RETBLEED_INTEL_MSG")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kernel/cpu/bugs.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
+index 36dcfc5105be9..bf79ff6a1f662 100644
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -1460,8 +1460,10 @@ static void __init retbleed_update_mitigation(void)
+ retbleed_mitigation = RETBLEED_MITIGATION_EIBRS;
+ break;
+ default:
+- if (retbleed_mitigation != RETBLEED_MITIGATION_STUFF)
++ if (retbleed_mitigation != RETBLEED_MITIGATION_STUFF) {
+ pr_err(RETBLEED_INTEL_MSG);
++ retbleed_mitigation = RETBLEED_MITIGATION_NONE;
++ }
+ }
+ }
+
+--
+2.51.0
+
--- /dev/null
+From 54968164c79970c4670228c2de8fd262e28c5c2e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 17 Sep 2025 14:09:13 +0800
+Subject: arch: Add the macro COMPILE_OFFSETS to all the asm-offsets.c
+
+From: Menglong Dong <menglong8.dong@gmail.com>
+
+[ Upstream commit 35561bab768977c9e05f1f1a9bc00134c85f3e28 ]
+
+The include/generated/asm-offsets.h is generated in Kbuild during
+compiling from arch/SRCARCH/kernel/asm-offsets.c. When we want to
+generate another similar offset header file, circular dependency can
+happen.
+
+For example, we want to generate a offset file include/generated/test.h,
+which is included in include/sched/sched.h. If we generate asm-offsets.h
+first, it will fail, as include/sched/sched.h is included in asm-offsets.c
+and include/generated/test.h doesn't exist; If we generate test.h first,
+it can't success neither, as include/generated/asm-offsets.h is included
+by it.
+
+In x86_64, the macro COMPILE_OFFSETS is used to avoid such circular
+dependency. We can generate asm-offsets.h first, and if the
+COMPILE_OFFSETS is defined, we don't include the "generated/test.h".
+
+And we define the macro COMPILE_OFFSETS for all the asm-offsets.c for this
+purpose.
+
+Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/alpha/kernel/asm-offsets.c | 1 +
+ arch/arc/kernel/asm-offsets.c | 1 +
+ arch/arm/kernel/asm-offsets.c | 2 ++
+ arch/arm64/kernel/asm-offsets.c | 1 +
+ arch/csky/kernel/asm-offsets.c | 1 +
+ arch/hexagon/kernel/asm-offsets.c | 1 +
+ arch/loongarch/kernel/asm-offsets.c | 2 ++
+ arch/m68k/kernel/asm-offsets.c | 1 +
+ arch/microblaze/kernel/asm-offsets.c | 1 +
+ arch/mips/kernel/asm-offsets.c | 2 ++
+ arch/nios2/kernel/asm-offsets.c | 1 +
+ arch/openrisc/kernel/asm-offsets.c | 1 +
+ arch/parisc/kernel/asm-offsets.c | 1 +
+ arch/powerpc/kernel/asm-offsets.c | 1 +
+ arch/riscv/kernel/asm-offsets.c | 1 +
+ arch/s390/kernel/asm-offsets.c | 1 +
+ arch/sh/kernel/asm-offsets.c | 1 +
+ arch/sparc/kernel/asm-offsets.c | 1 +
+ arch/um/kernel/asm-offsets.c | 2 ++
+ arch/xtensa/kernel/asm-offsets.c | 1 +
+ 20 files changed, 24 insertions(+)
+
+diff --git a/arch/alpha/kernel/asm-offsets.c b/arch/alpha/kernel/asm-offsets.c
+index 11c35cf45b461..cb205f22096d7 100644
+--- a/arch/alpha/kernel/asm-offsets.c
++++ b/arch/alpha/kernel/asm-offsets.c
+@@ -4,6 +4,7 @@
+ * This code generates raw asm output which is post-processed to extract
+ * and format the required data.
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/types.h>
+ #include <linux/stddef.h>
+diff --git a/arch/arc/kernel/asm-offsets.c b/arch/arc/kernel/asm-offsets.c
+index f77deb7991757..2978da85fcb65 100644
+--- a/arch/arc/kernel/asm-offsets.c
++++ b/arch/arc/kernel/asm-offsets.c
+@@ -2,6 +2,7 @@
+ /*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/sched.h>
+ #include <linux/mm.h>
+diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
+index 219cbc7e5d134..3840e1e22b751 100644
+--- a/arch/arm/kernel/asm-offsets.c
++++ b/arch/arm/kernel/asm-offsets.c
+@@ -7,6 +7,8 @@
+ * This code generates raw asm output which is post-processed to extract
+ * and format the required data.
+ */
++#define COMPILE_OFFSETS
++
+ #include <linux/compiler.h>
+ #include <linux/sched.h>
+ #include <linux/mm.h>
+diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
+index 5ff1942b04fcf..ea2d740db81c5 100644
+--- a/arch/arm64/kernel/asm-offsets.c
++++ b/arch/arm64/kernel/asm-offsets.c
+@@ -6,6 +6,7 @@
+ * 2001-2002 Keith Owens
+ * Copyright (C) 2012 ARM Ltd.
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/arm_sdei.h>
+ #include <linux/sched.h>
+diff --git a/arch/csky/kernel/asm-offsets.c b/arch/csky/kernel/asm-offsets.c
+index d1e9035794733..5525c8e7e1d9e 100644
+--- a/arch/csky/kernel/asm-offsets.c
++++ b/arch/csky/kernel/asm-offsets.c
+@@ -1,5 +1,6 @@
+ // SPDX-License-Identifier: GPL-2.0
+ // Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
++#define COMPILE_OFFSETS
+
+ #include <linux/sched.h>
+ #include <linux/kernel_stat.h>
+diff --git a/arch/hexagon/kernel/asm-offsets.c b/arch/hexagon/kernel/asm-offsets.c
+index 03a7063f94561..50eea9fa6f137 100644
+--- a/arch/hexagon/kernel/asm-offsets.c
++++ b/arch/hexagon/kernel/asm-offsets.c
+@@ -8,6 +8,7 @@
+ *
+ * Copyright (c) 2010-2012, The Linux Foundation. All rights reserved.
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/compat.h>
+ #include <linux/types.h>
+diff --git a/arch/loongarch/kernel/asm-offsets.c b/arch/loongarch/kernel/asm-offsets.c
+index 8da0726777edb..110afd3cc8f34 100644
+--- a/arch/loongarch/kernel/asm-offsets.c
++++ b/arch/loongarch/kernel/asm-offsets.c
+@@ -4,6 +4,8 @@
+ *
+ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
+ */
++#define COMPILE_OFFSETS
++
+ #include <linux/types.h>
+ #include <linux/sched.h>
+ #include <linux/mm.h>
+diff --git a/arch/m68k/kernel/asm-offsets.c b/arch/m68k/kernel/asm-offsets.c
+index 906d732305374..67a1990f9d748 100644
+--- a/arch/m68k/kernel/asm-offsets.c
++++ b/arch/m68k/kernel/asm-offsets.c
+@@ -9,6 +9,7 @@
+ * #defines from the assembly-language output.
+ */
+
++#define COMPILE_OFFSETS
+ #define ASM_OFFSETS_C
+
+ #include <linux/stddef.h>
+diff --git a/arch/microblaze/kernel/asm-offsets.c b/arch/microblaze/kernel/asm-offsets.c
+index 104c3ac5f30c8..b4b67d58e7f6a 100644
+--- a/arch/microblaze/kernel/asm-offsets.c
++++ b/arch/microblaze/kernel/asm-offsets.c
+@@ -7,6 +7,7 @@
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/init.h>
+ #include <linux/stddef.h>
+diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c
+index cb1045ebab062..22c99a2cd5707 100644
+--- a/arch/mips/kernel/asm-offsets.c
++++ b/arch/mips/kernel/asm-offsets.c
+@@ -9,6 +9,8 @@
+ * Kevin Kissell, kevink@mips.com and Carsten Langgaard, carstenl@mips.com
+ * Copyright (C) 2000 MIPS Technologies, Inc.
+ */
++#define COMPILE_OFFSETS
++
+ #include <linux/compat.h>
+ #include <linux/types.h>
+ #include <linux/sched.h>
+diff --git a/arch/nios2/kernel/asm-offsets.c b/arch/nios2/kernel/asm-offsets.c
+index e3d9b7b6fb48a..88190b503ce5d 100644
+--- a/arch/nios2/kernel/asm-offsets.c
++++ b/arch/nios2/kernel/asm-offsets.c
+@@ -2,6 +2,7 @@
+ /*
+ * Copyright (C) 2011 Tobias Klauser <tklauser@distanz.ch>
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/stddef.h>
+ #include <linux/sched.h>
+diff --git a/arch/openrisc/kernel/asm-offsets.c b/arch/openrisc/kernel/asm-offsets.c
+index 710651d5aaae1..3cc826f2216b1 100644
+--- a/arch/openrisc/kernel/asm-offsets.c
++++ b/arch/openrisc/kernel/asm-offsets.c
+@@ -18,6 +18,7 @@
+ * compile this file to assembler, and then extract the
+ * #defines from the assembly-language output.
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/signal.h>
+ #include <linux/sched.h>
+diff --git a/arch/parisc/kernel/asm-offsets.c b/arch/parisc/kernel/asm-offsets.c
+index 757816a7bd4b2..9abfe65492c65 100644
+--- a/arch/parisc/kernel/asm-offsets.c
++++ b/arch/parisc/kernel/asm-offsets.c
+@@ -13,6 +13,7 @@
+ * Copyright (C) 2002 Randolph Chung <tausq with parisc-linux.org>
+ * Copyright (C) 2003 James Bottomley <jejb at parisc-linux.org>
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/types.h>
+ #include <linux/sched.h>
+diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
+index 2affd30468bc4..e2cee2f2ededd 100644
+--- a/arch/powerpc/kernel/asm-offsets.c
++++ b/arch/powerpc/kernel/asm-offsets.c
+@@ -8,6 +8,7 @@
+ * compile this file to assembler, and then extract the
+ * #defines from the assembly-language output.
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/compat.h>
+ #include <linux/signal.h>
+diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
+index 6a992cba2f287..e4589457e6085 100644
+--- a/arch/riscv/kernel/asm-offsets.c
++++ b/arch/riscv/kernel/asm-offsets.c
+@@ -3,6 +3,7 @@
+ * Copyright (C) 2012 Regents of the University of California
+ * Copyright (C) 2017 SiFive
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/kbuild.h>
+ #include <linux/mm.h>
+diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
+index fa5f6885c74aa..73a989dcfe208 100644
+--- a/arch/s390/kernel/asm-offsets.c
++++ b/arch/s390/kernel/asm-offsets.c
+@@ -4,6 +4,7 @@
+ * This code generates raw asm output which is post-processed to extract
+ * and format the required data.
+ */
++#define COMPILE_OFFSETS
+
+ #define ASM_OFFSETS_C
+
+diff --git a/arch/sh/kernel/asm-offsets.c b/arch/sh/kernel/asm-offsets.c
+index a0322e8328456..429b6a7631468 100644
+--- a/arch/sh/kernel/asm-offsets.c
++++ b/arch/sh/kernel/asm-offsets.c
+@@ -8,6 +8,7 @@
+ * compile this file to assembler, and then extract the
+ * #defines from the assembly-language output.
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/stddef.h>
+ #include <linux/types.h>
+diff --git a/arch/sparc/kernel/asm-offsets.c b/arch/sparc/kernel/asm-offsets.c
+index 5784f2df489a4..f1e27a7f800f4 100644
+--- a/arch/sparc/kernel/asm-offsets.c
++++ b/arch/sparc/kernel/asm-offsets.c
+@@ -10,6 +10,7 @@
+ *
+ * On sparc, thread_info data is static and TI_XXX offsets are computed by hand.
+ */
++#define COMPILE_OFFSETS
+
+ #include <linux/sched.h>
+ #include <linux/mm_types.h>
+diff --git a/arch/um/kernel/asm-offsets.c b/arch/um/kernel/asm-offsets.c
+index 1fb12235ab9c8..a69873aa697f4 100644
+--- a/arch/um/kernel/asm-offsets.c
++++ b/arch/um/kernel/asm-offsets.c
+@@ -1 +1,3 @@
++#define COMPILE_OFFSETS
++
+ #include <sysdep/kernel-offsets.h>
+diff --git a/arch/xtensa/kernel/asm-offsets.c b/arch/xtensa/kernel/asm-offsets.c
+index da38de20ae598..cfbced95e944a 100644
+--- a/arch/xtensa/kernel/asm-offsets.c
++++ b/arch/xtensa/kernel/asm-offsets.c
+@@ -11,6 +11,7 @@
+ *
+ * Chris Zankel <chris@zankel.net>
+ */
++#define COMPILE_OFFSETS
+
+ #include <asm/processor.h>
+ #include <asm/coprocessor.h>
+--
+2.51.0
+
--- /dev/null
+From 7a103238fdb26a551efc3eec1a75f8d386103d02 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 6 Aug 2025 17:04:07 -0400
+Subject: audit: record fanotify event regardless of presence of rules
+
+From: Richard Guy Briggs <rgb@redhat.com>
+
+[ Upstream commit ce8370e2e62a903e18be7dd0e0be2eee079501e1 ]
+
+When no audit rules are in place, fanotify event results are
+unconditionally dropped due to an explicit check for the existence of
+any audit rules. Given this is a report from another security
+sub-system, allow it to be recorded regardless of the existence of any
+audit rules.
+
+To test, install and run the fapolicyd daemon with default config. Then
+as an unprivileged user, create and run a very simple binary that should
+be denied. Then check for an event with
+ ausearch -m FANOTIFY -ts recent
+
+Link: https://issues.redhat.com/browse/RHEL-9065
+Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
+Signed-off-by: Paul Moore <paul@paul-moore.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/audit.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/include/linux/audit.h b/include/linux/audit.h
+index 335e1ba5a2327..7ca75f8873799 100644
+--- a/include/linux/audit.h
++++ b/include/linux/audit.h
+@@ -526,7 +526,7 @@ static inline void audit_log_kern_module(const char *name)
+
+ static inline void audit_fanotify(u32 response, struct fanotify_response_info_audit_rule *friar)
+ {
+- if (!audit_dummy_context())
++ if (audit_enabled)
+ __audit_fanotify(response, friar);
+ }
+
+--
+2.51.0
+
--- /dev/null
+From 012af0d8a5f2d9c3d7e993a07113cefeca540801 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 27 Aug 2025 12:10:28 +0100
+Subject: btrfs: always drop log root tree reference in btrfs_replay_log()
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit 2f5b8095ea47b142c56c09755a8b1e14145a2d30 ]
+
+Currently we have this odd behaviour:
+
+1) At btrfs_replay_log() we drop the reference of the log root tree if
+ the call to btrfs_recover_log_trees() failed;
+
+2) But if the call to btrfs_recover_log_trees() did not fail, we don't
+ drop the reference in btrfs_replay_log() - we expect that
+ btrfs_recover_log_trees() does it in case it returns success.
+
+Let's simplify this and make btrfs_replay_log() always drop the reference
+on the log root tree, not only this simplifies code as it's what makes
+sense since it's btrfs_replay_log() who grabbed the reference in the first
+place.
+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/disk-io.c | 2 +-
+ fs/btrfs/tree-log.c | 1 -
+ 2 files changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
+index bb5f7911d473c..7ad1734cbbfc9 100644
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -2080,10 +2080,10 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
+
+ /* returns with log_tree_root freed on success */
+ ret = btrfs_recover_log_trees(log_tree_root);
++ btrfs_put_root(log_tree_root);
+ if (ret) {
+ btrfs_handle_fs_error(fs_info, ret,
+ "Failed to recover log tree");
+- btrfs_put_root(log_tree_root);
+ return ret;
+ }
+
+diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
+index 4b53e19f7520f..e00298c6c30a1 100644
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -7422,7 +7422,6 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
+
+ log_root_tree->log_root = NULL;
+ clear_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags);
+- btrfs_put_root(log_root_tree);
+
+ return 0;
+ error:
+--
+2.51.0
+
--- /dev/null
+From 33914610d5e0981512b297973a9438c04ce73add Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 1 Sep 2025 17:01:44 +0200
+Subject: btrfs: scrub: replace max_t()/min_t() with clamp() in
+ scrub_throttle_dev_io()
+
+From: Thorsten Blum <thorsten.blum@linux.dev>
+
+[ Upstream commit a7f3dfb8293c4cee99743132d69863a92e8f4875 ]
+
+Replace max_t() followed by min_t() with a single clamp().
+
+As was pointed by David Laight in
+https://lore.kernel.org/linux-btrfs/20250906122458.75dfc8f0@pumpkin/
+the calculation may overflow u32 when the input value is too large, so
+clamp_t() is not used. In practice the expected values are in range of
+megabytes to gigabytes (throughput limit) so the bug would not happen.
+
+Signed-off-by: Thorsten Blum <thorsten.blum@linux.dev>
+Reviewed-by: David Sterba <dsterba@suse.com>
+[ Use clamp() and add explanation. ]
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/scrub.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
+index 7632d652a1257..4a5a5ee360e57 100644
+--- a/fs/btrfs/scrub.c
++++ b/fs/btrfs/scrub.c
+@@ -1271,8 +1271,7 @@ static void scrub_throttle_dev_io(struct scrub_ctx *sctx, struct btrfs_device *d
+ * Slice is divided into intervals when the IO is submitted, adjust by
+ * bwlimit and maximum of 64 intervals.
+ */
+- div = max_t(u32, 1, (u32)(bwlimit / (16 * 1024 * 1024)));
+- div = min_t(u32, 64, div);
++ div = clamp(bwlimit / (16 * 1024 * 1024), 1, 64);
+
+ /* Start new epoch, set deadline */
+ now = ktime_get();
+--
+2.51.0
+
--- /dev/null
+From 2a84dc26e9fa0d7a677021843e9d860d72f6a485 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 28 Aug 2025 17:46:18 +0100
+Subject: btrfs: use level argument in log tree walk callback
+ replay_one_buffer()
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit 6cb7f0b8c9b0d6a35682335fea88bd26f089306f ]
+
+We already have the extent buffer's level in an argument, there's no need
+to first ensure the extent buffer's data is loaded (by calling
+btrfs_read_extent_buffer()) and then call btrfs_header_level() to check
+the level. So use the level argument and do the check before calling
+btrfs_read_extent_buffer().
+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/tree-log.c | 8 +++-----
+ 1 file changed, 3 insertions(+), 5 deletions(-)
+
+diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
+index e00298c6c30a1..5512991b24faa 100644
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -2493,15 +2493,13 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
+ int i;
+ int ret;
+
++ if (level != 0)
++ return 0;
++
+ ret = btrfs_read_extent_buffer(eb, &check);
+ if (ret)
+ return ret;
+
+- level = btrfs_header_level(eb);
+-
+- if (level != 0)
+- return 0;
+-
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+--
+2.51.0
+
--- /dev/null
+From 4e2e37c8c157fbd155fedbe333bfb6f4e13941f1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Sep 2025 12:09:14 +0100
+Subject: btrfs: use smp_mb__after_atomic() when forcing COW in
+ create_pending_snapshot()
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit 45c222468d33202c07c41c113301a4b9c8451b8f ]
+
+After setting the BTRFS_ROOT_FORCE_COW flag on the root we are doing a
+full write barrier, smp_wmb(), but we don't need to, all we need is a
+smp_mb__after_atomic(). The use of the smp_wmb() is from the old days
+when we didn't use a bit and used instead an int field in the root to
+signal if cow is forced. After the int field was changed to a bit in
+the root's state (flags field), we forgot to update the memory barrier
+in create_pending_snapshot() to smp_mb__after_atomic(), but we did the
+change in commit_fs_roots() after clearing BTRFS_ROOT_FORCE_COW. That
+happened in commit 27cdeb7096b8 ("Btrfs: use bitfield instead of integer
+data type for the some variants in btrfs_root"). On the reader side, in
+should_cow_block(), we also use the counterpart smp_mb__before_atomic()
+which generates further confusion.
+
+So change the smp_wmb() to smp_mb__after_atomic(). In fact we don't
+even need any barrier at all since create_pending_snapshot() is called
+in the critical section of a transaction commit and therefore no one
+can concurrently join/attach the transaction, or start a new one, until
+the transaction is unblocked. By the time someone starts a new transaction
+and enters should_cow_block(), a lot of implicit memory barriers already
+took place by having acquired several locks such as fs_info->trans_lock
+and extent buffer locks on the root node at least. Nevertlheless, for
+consistency use smp_mb__after_atomic() after setting the force cow bit
+in create_pending_snapshot().
+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/transaction.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
+index 3989cb19cdae7..20add63421b3d 100644
+--- a/fs/btrfs/transaction.c
++++ b/fs/btrfs/transaction.c
+@@ -1796,7 +1796,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
+ }
+ /* see comments in should_cow_block() */
+ set_bit(BTRFS_ROOT_FORCE_COW, &root->state);
+- smp_wmb();
++ smp_mb__after_atomic();
+
+ btrfs_set_root_node(new_root_item, tmp);
+ /* record when the snapshot was created in key.offset */
+--
+2.51.0
+
--- /dev/null
+From ffa3e67ee0a42fb8a270a866991cde00d27090a1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Jul 2025 11:13:15 +0900
+Subject: btrfs: zoned: refine extent allocator hint selection
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+[ Upstream commit 0d703963d297964451783e1a0688ebdf74cd6151 ]
+
+The hint block group selection in the extent allocator is wrong in the
+first place, as it can select the dedicated data relocation block group for
+the normal data allocation.
+
+Since we separated the normal data space_info and the data relocation
+space_info, we can easily identify a block group is for data relocation or
+not. Do not choose it for the normal data allocation.
+
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/extent-tree.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
+index 8248113eb067f..5e3d1a87b7e9d 100644
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -4175,7 +4175,8 @@ static int prepare_allocation_clustered(struct btrfs_fs_info *fs_info,
+ }
+
+ static int prepare_allocation_zoned(struct btrfs_fs_info *fs_info,
+- struct find_free_extent_ctl *ffe_ctl)
++ struct find_free_extent_ctl *ffe_ctl,
++ struct btrfs_space_info *space_info)
+ {
+ if (ffe_ctl->for_treelog) {
+ spin_lock(&fs_info->treelog_bg_lock);
+@@ -4199,6 +4200,7 @@ static int prepare_allocation_zoned(struct btrfs_fs_info *fs_info,
+ u64 avail = block_group->zone_capacity - block_group->alloc_offset;
+
+ if (block_group_bits(block_group, ffe_ctl->flags) &&
++ block_group->space_info == space_info &&
+ avail >= ffe_ctl->num_bytes) {
+ ffe_ctl->hint_byte = block_group->start;
+ break;
+@@ -4220,7 +4222,7 @@ static int prepare_allocation(struct btrfs_fs_info *fs_info,
+ return prepare_allocation_clustered(fs_info, ffe_ctl,
+ space_info, ins);
+ case BTRFS_EXTENT_ALLOC_ZONED:
+- return prepare_allocation_zoned(fs_info, ffe_ctl);
++ return prepare_allocation_zoned(fs_info, ffe_ctl, space_info);
+ default:
+ BUG();
+ }
+--
+2.51.0
+
--- /dev/null
+From 34e6b449c4d896095ab2e81088393f75b5995c52 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 22 Jul 2025 13:39:11 +0200
+Subject: btrfs: zoned: return error from btrfs_zone_finish_endio()
+
+From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+
+[ Upstream commit 3c44cd3c79fcb38a86836dea6ff8fec322a9e68c ]
+
+Now that btrfs_zone_finish_endio_workfn() is directly calling
+do_zone_finish() the only caller of btrfs_zone_finish_endio() is
+btrfs_finish_one_ordered().
+
+btrfs_finish_one_ordered() already has error handling in-place so
+btrfs_zone_finish_endio() can return an error if the block group lookup
+fails.
+
+Also as btrfs_zone_finish_endio() already checks for zoned filesystems and
+returns early, there's no need to do this in the caller.
+
+Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
+Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/inode.c | 7 ++++---
+ fs/btrfs/zoned.c | 8 +++++---
+ fs/btrfs/zoned.h | 9 ++++++---
+ 3 files changed, 15 insertions(+), 9 deletions(-)
+
+diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
+index ee5ffeab85bb7..b1be3e0fe7282 100644
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -3051,9 +3051,10 @@ int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent)
+ goto out;
+ }
+
+- if (btrfs_is_zoned(fs_info))
+- btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr,
+- ordered_extent->disk_num_bytes);
++ ret = btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr,
++ ordered_extent->disk_num_bytes);
++ if (ret)
++ goto out;
+
+ if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) {
+ truncated = true;
+diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
+index 3622ba1d8e09f..6e8b8c46ba18f 100644
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -2263,16 +2263,17 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)
+ return ret;
+ }
+
+-void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 length)
++int btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 length)
+ {
+ struct btrfs_block_group *block_group;
+ u64 min_alloc_bytes;
+
+ if (!btrfs_is_zoned(fs_info))
+- return;
++ return 0;
+
+ block_group = btrfs_lookup_block_group(fs_info, logical);
+- ASSERT(block_group);
++ if (WARN_ON_ONCE(!block_group))
++ return -ENOENT;
+
+ /* No MIXED_BG on zoned btrfs. */
+ if (block_group->flags & BTRFS_BLOCK_GROUP_DATA)
+@@ -2289,6 +2290,7 @@ void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 len
+
+ out:
+ btrfs_put_block_group(block_group);
++ return 0;
+ }
+
+ static void btrfs_zone_finish_endio_workfn(struct work_struct *work)
+diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
+index 448955641d114..c18f31d3dc25f 100644
+--- a/fs/btrfs/zoned.h
++++ b/fs/btrfs/zoned.h
+@@ -71,7 +71,7 @@ int btrfs_sync_zone_write_pointer(struct btrfs_device *tgt_dev, u64 logical,
+ bool btrfs_zone_activate(struct btrfs_block_group *block_group);
+ int btrfs_zone_finish(struct btrfs_block_group *block_group);
+ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags);
+-void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical,
++int btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical,
+ u64 length);
+ void btrfs_schedule_zone_finish_bg(struct btrfs_block_group *bg,
+ struct extent_buffer *eb);
+@@ -227,8 +227,11 @@ static inline bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices,
+ return true;
+ }
+
+-static inline void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info,
+- u64 logical, u64 length) { }
++static inline int btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info,
++ u64 logical, u64 length)
++{
++ return 0;
++}
+
+ static inline void btrfs_schedule_zone_finish_bg(struct btrfs_block_group *bg,
+ struct extent_buffer *eb) { }
+--
+2.51.0
+
--- /dev/null
+From 762f27a0e18a98a9747de23af1f4ad13e83ccecf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 16 Sep 2025 20:30:17 +0000
+Subject: EDAC/mc_sysfs: Increase legacy channel support to 16
+
+From: Avadhut Naik <avadhut.naik@amd.com>
+
+[ Upstream commit 6e1c2c6c2c40ce99e0d2633b212f43c702c1a002 ]
+
+Newer AMD systems can support up to 16 channels per EDAC "mc" device.
+These are detected by the EDAC module running on the device, and the
+current EDAC interface is appropriately enumerated.
+
+The legacy EDAC sysfs interface however, provides device attributes for
+channels 0 through 11 only. Consequently, the last four channels, 12
+through 15, will not be enumerated and will not be visible through the
+legacy sysfs interface.
+
+Add additional device attributes to ensure that all 16 channels, if
+present, are enumerated by and visible through the legacy EDAC sysfs
+interface.
+
+Signed-off-by: Avadhut Naik <avadhut.naik@amd.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://lore.kernel.org/20250916203242.1281036-1-avadhut.naik@amd.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/edac/edac_mc_sysfs.c | 24 ++++++++++++++++++++++++
+ 1 file changed, 24 insertions(+)
+
+diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
+index 15f63452a9bec..b01436d9ddaed 100644
+--- a/drivers/edac/edac_mc_sysfs.c
++++ b/drivers/edac/edac_mc_sysfs.c
+@@ -306,6 +306,14 @@ DEVICE_CHANNEL(ch10_dimm_label, S_IRUGO | S_IWUSR,
+ channel_dimm_label_show, channel_dimm_label_store, 10);
+ DEVICE_CHANNEL(ch11_dimm_label, S_IRUGO | S_IWUSR,
+ channel_dimm_label_show, channel_dimm_label_store, 11);
++DEVICE_CHANNEL(ch12_dimm_label, S_IRUGO | S_IWUSR,
++ channel_dimm_label_show, channel_dimm_label_store, 12);
++DEVICE_CHANNEL(ch13_dimm_label, S_IRUGO | S_IWUSR,
++ channel_dimm_label_show, channel_dimm_label_store, 13);
++DEVICE_CHANNEL(ch14_dimm_label, S_IRUGO | S_IWUSR,
++ channel_dimm_label_show, channel_dimm_label_store, 14);
++DEVICE_CHANNEL(ch15_dimm_label, S_IRUGO | S_IWUSR,
++ channel_dimm_label_show, channel_dimm_label_store, 15);
+
+ /* Total possible dynamic DIMM Label attribute file table */
+ static struct attribute *dynamic_csrow_dimm_attr[] = {
+@@ -321,6 +329,10 @@ static struct attribute *dynamic_csrow_dimm_attr[] = {
+ &dev_attr_legacy_ch9_dimm_label.attr.attr,
+ &dev_attr_legacy_ch10_dimm_label.attr.attr,
+ &dev_attr_legacy_ch11_dimm_label.attr.attr,
++ &dev_attr_legacy_ch12_dimm_label.attr.attr,
++ &dev_attr_legacy_ch13_dimm_label.attr.attr,
++ &dev_attr_legacy_ch14_dimm_label.attr.attr,
++ &dev_attr_legacy_ch15_dimm_label.attr.attr,
+ NULL
+ };
+
+@@ -349,6 +361,14 @@ DEVICE_CHANNEL(ch10_ce_count, S_IRUGO,
+ channel_ce_count_show, NULL, 10);
+ DEVICE_CHANNEL(ch11_ce_count, S_IRUGO,
+ channel_ce_count_show, NULL, 11);
++DEVICE_CHANNEL(ch12_ce_count, S_IRUGO,
++ channel_ce_count_show, NULL, 12);
++DEVICE_CHANNEL(ch13_ce_count, S_IRUGO,
++ channel_ce_count_show, NULL, 13);
++DEVICE_CHANNEL(ch14_ce_count, S_IRUGO,
++ channel_ce_count_show, NULL, 14);
++DEVICE_CHANNEL(ch15_ce_count, S_IRUGO,
++ channel_ce_count_show, NULL, 15);
+
+ /* Total possible dynamic ce_count attribute file table */
+ static struct attribute *dynamic_csrow_ce_count_attr[] = {
+@@ -364,6 +384,10 @@ static struct attribute *dynamic_csrow_ce_count_attr[] = {
+ &dev_attr_legacy_ch9_ce_count.attr.attr,
+ &dev_attr_legacy_ch10_ce_count.attr.attr,
+ &dev_attr_legacy_ch11_ce_count.attr.attr,
++ &dev_attr_legacy_ch12_ce_count.attr.attr,
++ &dev_attr_legacy_ch13_ce_count.attr.attr,
++ &dev_attr_legacy_ch14_ce_count.attr.attr,
++ &dev_attr_legacy_ch15_ce_count.attr.attr,
+ NULL
+ };
+
+--
+2.51.0
+
--- /dev/null
+From 802862b01265aaa81829f67331259dede4a3354b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Aug 2025 14:03:40 -0400
+Subject: perf: Have get_perf_callchain() return NULL if crosstask and user are
+ set
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+[ Upstream commit 153f9e74dec230f2e070e16fa061bc7adfd2c450 ]
+
+get_perf_callchain() doesn't support cross-task unwinding for user space
+stacks, have it return NULL if both the crosstask and user arguments are
+set.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20250820180428.426423415@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/events/callchain.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
+index 65fea424874c5..ee01cfcc35064 100644
+--- a/kernel/events/callchain.c
++++ b/kernel/events/callchain.c
+@@ -184,6 +184,10 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
+ struct perf_callchain_entry_ctx ctx;
+ int rctx;
+
++ /* crosstask is not supported for user stacks */
++ if (crosstask && user && !kernel)
++ return NULL;
++
+ entry = get_callchain_entry(&rctx);
+ if (!entry)
+ return NULL;
+@@ -200,7 +204,7 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
+ perf_callchain_kernel(&ctx, regs);
+ }
+
+- if (user) {
++ if (user && !crosstask) {
+ if (!user_mode(regs)) {
+ if (current->flags & (PF_KTHREAD | PF_USER_WORKER))
+ regs = NULL;
+@@ -209,9 +213,6 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
+ }
+
+ if (regs) {
+- if (crosstask)
+- goto exit_put;
+-
+ if (add_mark)
+ perf_callchain_store_context(&ctx, PERF_CONTEXT_USER);
+
+@@ -219,7 +220,6 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
+ }
+ }
+
+-exit_put:
+ put_callchain_entry(rctx);
+
+ return entry;
+--
+2.51.0
+
--- /dev/null
+From f7b7c12558c04bfe26dfb17cefd5143e4e8e98f9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Aug 2025 14:03:43 -0400
+Subject: perf: Skip user unwind if the task is a kernel thread
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+[ Upstream commit 16ed389227651330879e17bd83d43bd234006722 ]
+
+If the task is not a user thread, there's no user stack to unwind.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20250820180428.930791978@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/events/core.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/kernel/events/core.c b/kernel/events/core.c
+index 3eb9125431b43..c9a3fb6fdb2f6 100644
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -7724,7 +7724,8 @@ struct perf_callchain_entry *
+ perf_callchain(struct perf_event *event, struct pt_regs *regs)
+ {
+ bool kernel = !event->attr.exclude_callchain_kernel;
+- bool user = !event->attr.exclude_callchain_user;
++ bool user = !event->attr.exclude_callchain_user &&
++ !(current->flags & (PF_KTHREAD | PF_USER_WORKER));
+ /* Disallow cross-task user callchains. */
+ bool crosstask = event->ctx->task && event->ctx->task != current;
+ const u32 max_stack = event->attr.sample_max_stack;
+--
+2.51.0
+
--- /dev/null
+From a77239aceb868da27cb7d047c23b0e2c38130faf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Aug 2025 14:03:41 -0400
+Subject: perf: Use current->flags & PF_KTHREAD|PF_USER_WORKER instead of
+ current->mm == NULL
+
+From: Steven Rostedt <rostedt@goodmis.org>
+
+[ Upstream commit 90942f9fac05702065ff82ed0bade0d08168d4ea ]
+
+To determine if a task is a kernel thread or not, it is more reliable to
+use (current->flags & (PF_KTHREAD|PF_USER_WORKERi)) than to rely on
+current->mm being NULL. That is because some kernel tasks (io_uring
+helpers) may have a mm field.
+
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20250820180428.592367294@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/events/callchain.c | 6 +++---
+ kernel/events/core.c | 4 ++--
+ 2 files changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
+index 1273be84392cf..65fea424874c5 100644
+--- a/kernel/events/callchain.c
++++ b/kernel/events/callchain.c
+@@ -202,10 +202,10 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
+
+ if (user) {
+ if (!user_mode(regs)) {
+- if (current->mm)
+- regs = task_pt_regs(current);
+- else
++ if (current->flags & (PF_KTHREAD | PF_USER_WORKER))
+ regs = NULL;
++ else
++ regs = task_pt_regs(current);
+ }
+
+ if (regs) {
+diff --git a/kernel/events/core.c b/kernel/events/core.c
+index b73f5c44113d6..3eb9125431b43 100644
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -6985,7 +6985,7 @@ static void perf_sample_regs_user(struct perf_regs *regs_user,
+ if (user_mode(regs)) {
+ regs_user->abi = perf_reg_abi(current);
+ regs_user->regs = regs;
+- } else if (!(current->flags & PF_KTHREAD)) {
++ } else if (!(current->flags & (PF_KTHREAD | PF_USER_WORKER))) {
+ perf_get_regs_user(regs_user, regs);
+ } else {
+ regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE;
+@@ -7612,7 +7612,7 @@ static u64 perf_virt_to_phys(u64 virt)
+ * Try IRQ-safe get_user_page_fast_only first.
+ * If failed, leave phys_addr as 0.
+ */
+- if (current->mm != NULL) {
++ if (!(current->flags & (PF_KTHREAD | PF_USER_WORKER))) {
+ struct page *p;
+
+ pagefault_disable();
+--
+2.51.0
+
net-sched-sch_qfq-fix-null-deref-in-agg_dequeue.patch
+audit-record-fanotify-event-regardless-of-presence-o.patch
+perf-use-current-flags-pf_kthread-pf_user_worker-ins.patch
+perf-have-get_perf_callchain-return-null-if-crosstas.patch
+perf-skip-user-unwind-if-the-task-is-a-kernel-thread.patch
+x86-bugs-report-correct-retbleed-mitigation-status.patch
+x86-bugs-fix-reporting-of-lfence-retpoline.patch
+edac-mc_sysfs-increase-legacy-channel-support-to-16.patch
+btrfs-zoned-return-error-from-btrfs_zone_finish_endi.patch
+btrfs-zoned-refine-extent-allocator-hint-selection.patch
+btrfs-scrub-replace-max_t-min_t-with-clamp-in-scrub_.patch
+btrfs-always-drop-log-root-tree-reference-in-btrfs_r.patch
+btrfs-use-level-argument-in-log-tree-walk-callback-r.patch
+btrfs-use-smp_mb__after_atomic-when-forcing-cow-in-c.patch
+arch-add-the-macro-compile_offsets-to-all-the-asm-of.patch
--- /dev/null
+From 9e19126d42b14a90eced153d573608f84adf3db2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 15 Sep 2025 08:47:05 -0500
+Subject: x86/bugs: Fix reporting of LFENCE retpoline
+
+From: David Kaplan <david.kaplan@amd.com>
+
+[ Upstream commit d1cc1baef67ac6c09b74629ca053bf3fb812f7dc ]
+
+The LFENCE retpoline mitigation is not secure but the kernel prints
+inconsistent messages about this fact. The dmesg log says 'Mitigation:
+LFENCE', implying the system is mitigated. But sysfs reports 'Vulnerable:
+LFENCE' implying the system (correctly) is not mitigated.
+
+Fix this by printing a consistent 'Vulnerable: LFENCE' string everywhere
+when this mitigation is selected.
+
+Signed-off-by: David Kaplan <david.kaplan@amd.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://lore.kernel.org/20250915134706.3201818-1-david.kaplan@amd.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kernel/cpu/bugs.c | 5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
+index f66e0e5b49eb1..ef1d3a5024ed4 100644
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -1594,7 +1594,7 @@ spectre_v2_user_select_mitigation(void)
+ static const char * const spectre_v2_strings[] = {
+ [SPECTRE_V2_NONE] = "Vulnerable",
+ [SPECTRE_V2_RETPOLINE] = "Mitigation: Retpolines",
+- [SPECTRE_V2_LFENCE] = "Mitigation: LFENCE",
++ [SPECTRE_V2_LFENCE] = "Vulnerable: LFENCE",
+ [SPECTRE_V2_EIBRS] = "Mitigation: Enhanced / Automatic IBRS",
+ [SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced / Automatic IBRS + LFENCE",
+ [SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced / Automatic IBRS + Retpolines",
+@@ -3222,9 +3222,6 @@ static const char *spectre_bhi_state(void)
+
+ static ssize_t spectre_v2_show_state(char *buf)
+ {
+- if (spectre_v2_enabled == SPECTRE_V2_LFENCE)
+- return sysfs_emit(buf, "Vulnerable: LFENCE\n");
+-
+ if (spectre_v2_enabled == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled())
+ return sysfs_emit(buf, "Vulnerable: eIBRS with unprivileged eBPF\n");
+
+--
+2.51.0
+
--- /dev/null
+From c0f1ee31d227222ce9307b53bc63615f5c5fc2b6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 15 Sep 2025 08:47:06 -0500
+Subject: x86/bugs: Report correct retbleed mitigation status
+
+From: David Kaplan <david.kaplan@amd.com>
+
+[ Upstream commit 930f2361fe542a00de9ce6070b1b6edb976f1165 ]
+
+On Intel CPUs, the default retbleed mitigation is IBRS/eIBRS but this
+requires that a similar spectre_v2 mitigation is applied. If the user
+selects a different spectre_v2 mitigation (like spectre_v2=retpoline) a
+warning is printed but sysfs will still report 'Mitigation: IBRS' or
+'Mitigation: Enhanced IBRS'. This is incorrect because retbleed is not
+mitigated, and IBRS is not actually set.
+
+Fix this by choosing RETBLEED_MITIGATION_NONE in this scenario so the
+kernel correctly reports the system as vulnerable to retbleed.
+
+Signed-off-by: David Kaplan <david.kaplan@amd.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://lore.kernel.org/20250915134706.3201818-1-david.kaplan@amd.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kernel/cpu/bugs.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
+index 315926ccea0fa..f66e0e5b49eb1 100644
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -1185,8 +1185,10 @@ static void __init retbleed_select_mitigation(void)
+ retbleed_mitigation = RETBLEED_MITIGATION_EIBRS;
+ break;
+ default:
+- if (retbleed_mitigation != RETBLEED_MITIGATION_STUFF)
++ if (retbleed_mitigation != RETBLEED_MITIGATION_STUFF) {
+ pr_err(RETBLEED_INTEL_MSG);
++ retbleed_mitigation = RETBLEED_MITIGATION_NONE;
++ }
+ }
+ }
+
+--
+2.51.0
+