From: Sasha Levin Date: Thu, 12 Dec 2024 01:10:50 +0000 (-0500) Subject: Fixes for 6.12 X-Git-Tag: v5.4.287~64 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=2f64cb86fd19e3f23657293826b42af14b6c7122;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 6.12 Signed-off-by: Sasha Levin --- diff --git a/queue-6.12/btrfs-drop-unused-parameter-data-from-btrfs_fill_sup.patch b/queue-6.12/btrfs-drop-unused-parameter-data-from-btrfs_fill_sup.patch new file mode 100644 index 00000000000..f72121003e2 --- /dev/null +++ b/queue-6.12/btrfs-drop-unused-parameter-data-from-btrfs_fill_sup.patch @@ -0,0 +1,47 @@ +From 91a767d8756f715a77a66c8eac7ae6af942a8cf0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 9 Oct 2024 16:32:17 +0200 +Subject: btrfs: drop unused parameter data from btrfs_fill_super() + +From: David Sterba + +[ Upstream commit 01c5db782e3ad1aea1d06a1765c710328c145f10 ] + +The only caller passes NULL, we can drop the parameter. This is since +the new mount option parser done in 3bb17a25bcb09a ("btrfs: add get_tree +callback for new mount API"). + +Reviewed-by: Anand Jain +Signed-off-by: David Sterba +Stable-dep-of: 951a3f59d268 ("btrfs: fix mount failure due to remount races") +Signed-off-by: Sasha Levin +--- + fs/btrfs/super.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c +index 1a4225a1a2003..0c477443fbc5f 100644 +--- a/fs/btrfs/super.c ++++ b/fs/btrfs/super.c +@@ -946,8 +946,7 @@ static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objec + } + + static int btrfs_fill_super(struct super_block *sb, +- struct btrfs_fs_devices *fs_devices, +- void *data) ++ struct btrfs_fs_devices *fs_devices) + { + struct inode *inode; + struct btrfs_fs_info *fs_info = btrfs_sb(sb); +@@ -1893,7 +1892,7 @@ static int btrfs_get_tree_super(struct fs_context *fc) + snprintf(sb->s_id, sizeof(sb->s_id), "%pg", bdev); + shrinker_debugfs_rename(sb->s_shrink, "sb-btrfs:%s", sb->s_id); + btrfs_sb(sb)->bdev_holder = &btrfs_fs_type; +- ret = btrfs_fill_super(sb, fs_devices, NULL); ++ ret = btrfs_fill_super(sb, fs_devices); + } + + if (ret) { +-- +2.43.0 + diff --git a/queue-6.12/btrfs-drop-unused-parameter-options-from-open_ctree.patch b/queue-6.12/btrfs-drop-unused-parameter-options-from-open_ctree.patch new file mode 100644 index 00000000000..d8e557f53c8 --- /dev/null +++ b/queue-6.12/btrfs-drop-unused-parameter-options-from-open_ctree.patch @@ -0,0 +1,67 @@ +From f59f0cae5864909ffc3b1bf7d87c37b3e1f3220f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 9 Oct 2024 16:32:07 +0200 +Subject: btrfs: drop unused parameter options from open_ctree() + +From: David Sterba + +[ Upstream commit 87cbab86366e75dec52f787e0e0b17b2aea769ca ] + +Since the new mount option parser in commit ad21f15b0f79 ("btrfs: +switch to the new mount API") we don't pass the options like that +anymore. + +Reviewed-by: Anand Jain +Signed-off-by: David Sterba +Stable-dep-of: 951a3f59d268 ("btrfs: fix mount failure due to remount races") +Signed-off-by: Sasha Levin +--- + fs/btrfs/disk-io.c | 3 +-- + fs/btrfs/disk-io.h | 3 +-- + fs/btrfs/super.c | 2 +- + 3 files changed, 3 insertions(+), 5 deletions(-) + +diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c +index b11bfe68dd65f..43b7b331b2da3 100644 +--- a/fs/btrfs/disk-io.c ++++ b/fs/btrfs/disk-io.c +@@ -3202,8 +3202,7 @@ int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount) + return 0; + } + +-int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices, +- const char *options) ++int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices) + { + u32 sectorsize; + u32 nodesize; +diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h +index 99af64d3f2778..127e31e083470 100644 +--- a/fs/btrfs/disk-io.h ++++ b/fs/btrfs/disk-io.h +@@ -52,8 +52,7 @@ struct extent_buffer *btrfs_find_create_tree_block( + int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info); + int btrfs_check_super_csum(struct btrfs_fs_info *fs_info, + const struct btrfs_super_block *disk_sb); +-int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices, +- const char *options); ++int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices); + void __cold close_ctree(struct btrfs_fs_info *fs_info); + int btrfs_validate_super(const struct btrfs_fs_info *fs_info, + const struct btrfs_super_block *sb, int mirror_num); +diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c +index c64d071341223..1a4225a1a2003 100644 +--- a/fs/btrfs/super.c ++++ b/fs/btrfs/super.c +@@ -971,7 +971,7 @@ static int btrfs_fill_super(struct super_block *sb, + return err; + } + +- err = open_ctree(sb, fs_devices, (char *)data); ++ err = open_ctree(sb, fs_devices); + if (err) { + btrfs_err(fs_info, "open_ctree failed"); + return err; +-- +2.43.0 + diff --git a/queue-6.12/btrfs-fix-missing-snapshot-drew-unlock-when-root-is-.patch b/queue-6.12/btrfs-fix-missing-snapshot-drew-unlock-when-root-is-.patch new file mode 100644 index 00000000000..4480c050ab2 --- /dev/null +++ b/queue-6.12/btrfs-fix-missing-snapshot-drew-unlock-when-root-is-.patch @@ -0,0 +1,41 @@ +From 9dc0d693919ba0967d23b54473a4faabbdb8022f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 29 Nov 2024 13:33:03 +0000 +Subject: btrfs: fix missing snapshot drew unlock when root is dead during swap + activation + +From: Filipe Manana + +[ Upstream commit 9c803c474c6c002d8ade68ebe99026cc39c37f85 ] + +When activating a swap file we acquire the root's snapshot drew lock and +then check if the root is dead, failing and returning with -EPERM if it's +dead but without unlocking the root's snapshot lock. Fix this by adding +the missing unlock. + +Fixes: 60021bd754c6 ("btrfs: prevent subvol with swapfile from being deleted") +Reviewed-by: Johannes Thumshirn +Reviewed-by: David Sterba +Reviewed-by: Qu Wenruo +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +--- + fs/btrfs/inode.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c +index d067db2619713..58ffe78132d9d 100644 +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -9857,6 +9857,7 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file, + if (btrfs_root_dead(root)) { + spin_unlock(&root->root_item_lock); + ++ btrfs_drew_write_unlock(&root->snapshot_lock); + btrfs_exclop_finish(fs_info); + btrfs_warn(fs_info, + "cannot activate swapfile because subvolume %llu is being deleted", +-- +2.43.0 + diff --git a/queue-6.12/btrfs-fix-mount-failure-due-to-remount-races.patch b/queue-6.12/btrfs-fix-mount-failure-due-to-remount-races.patch new file mode 100644 index 00000000000..34b1b524eaa --- /dev/null +++ b/queue-6.12/btrfs-fix-mount-failure-due-to-remount-races.patch @@ -0,0 +1,192 @@ +From 2cf783d86b37ca3ec58fa29779e57b62f887eeb9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 30 Oct 2024 11:25:48 +1030 +Subject: btrfs: fix mount failure due to remount races + +From: Qu Wenruo + +[ Upstream commit 951a3f59d268fe1397aaeb9a96fcb1944890c4cb ] + +[BUG] +The following reproducer can cause btrfs mount to fail: + + dev="/dev/test/scratch1" + mnt1="/mnt/test" + mnt2="/mnt/scratch" + + mkfs.btrfs -f $dev + mount $dev $mnt1 + btrfs subvolume create $mnt1/subvol1 + btrfs subvolume create $mnt1/subvol2 + umount $mnt1 + + mount $dev $mnt1 -o subvol=subvol1 + while mount -o remount,ro $mnt1; do mount -o remount,rw $mnt1; done & + bg=$! + + while mount $dev $mnt2 -o subvol=subvol2; do umount $mnt2; done + + kill $bg + wait + umount -R $mnt1 + umount -R $mnt2 + +The script will fail with the following error: + + mount: /mnt/scratch: /dev/mapper/test-scratch1 already mounted on /mnt/test. + dmesg(1) may have more information after failed mount system call. + umount: /mnt/test: target is busy. + umount: /mnt/scratch/: not mounted + +And there is no kernel error message. + +[CAUSE] +During the btrfs mount, to support mounting different subvolumes with +different RO/RW flags, we need to detect that and retry if needed: + + Retry with matching RO flags if the initial mount fail with -EBUSY. + +The problem is, during that retry we do not hold any super block lock +(s_umount), this means there can be a remount process changing the RO +flags of the original fs super block. + +If so, we can have an EBUSY error during retry. And this time we treat +any failure as an error, without any retry and cause the above EBUSY +mount failure. + +[FIX] +The current retry behavior is racy because we do not have a super block +thus no way to hold s_umount to prevent the race with remount. + +Solve the root problem by allowing fc->sb_flags to mismatch from the +sb->s_flags at btrfs_get_tree_super(). + +Then at the re-entry point btrfs_get_tree_subvol(), manually check the +fc->s_flags against sb->s_flags, if it's a RO->RW mismatch, then +reconfigure with s_umount lock hold. + +Reported-by: Enno Gotthold +Reported-by: Fabian Vogt +[ Special thanks for the reproducer and early analysis pointing to btrfs. ] +Fixes: f044b318675f ("btrfs: handle the ro->rw transition for mounting different subvolumes") +Link: https://bugzilla.suse.com/show_bug.cgi?id=1231836 +Signed-off-by: Qu Wenruo +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +--- + fs/btrfs/super.c | 66 ++++++++++++++++++++---------------------------- + 1 file changed, 27 insertions(+), 39 deletions(-) + +diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c +index 0c477443fbc5f..8292e488d3d77 100644 +--- a/fs/btrfs/super.c ++++ b/fs/btrfs/super.c +@@ -1886,18 +1886,21 @@ static int btrfs_get_tree_super(struct fs_context *fc) + + if (sb->s_root) { + btrfs_close_devices(fs_devices); +- if ((fc->sb_flags ^ sb->s_flags) & SB_RDONLY) +- ret = -EBUSY; ++ /* ++ * At this stage we may have RO flag mismatch between ++ * fc->sb_flags and sb->s_flags. Caller should detect such ++ * mismatch and reconfigure with sb->s_umount rwsem held if ++ * needed. ++ */ + } else { + snprintf(sb->s_id, sizeof(sb->s_id), "%pg", bdev); + shrinker_debugfs_rename(sb->s_shrink, "sb-btrfs:%s", sb->s_id); + btrfs_sb(sb)->bdev_holder = &btrfs_fs_type; + ret = btrfs_fill_super(sb, fs_devices); +- } +- +- if (ret) { +- deactivate_locked_super(sb); +- return ret; ++ if (ret) { ++ deactivate_locked_super(sb); ++ return ret; ++ } + } + + btrfs_clear_oneshot_options(fs_info); +@@ -1983,39 +1986,18 @@ static int btrfs_get_tree_super(struct fs_context *fc) + * btrfs or not, setting the whole super block RO. To make per-subvolume mounting + * work with different options work we need to keep backward compatibility. + */ +-static struct vfsmount *btrfs_reconfigure_for_mount(struct fs_context *fc) ++static int btrfs_reconfigure_for_mount(struct fs_context *fc, struct vfsmount *mnt) + { +- struct vfsmount *mnt; +- int ret; +- const bool ro2rw = !(fc->sb_flags & SB_RDONLY); +- +- /* +- * We got an EBUSY because our SB_RDONLY flag didn't match the existing +- * super block, so invert our setting here and retry the mount so we +- * can get our vfsmount. +- */ +- if (ro2rw) +- fc->sb_flags |= SB_RDONLY; +- else +- fc->sb_flags &= ~SB_RDONLY; +- +- mnt = fc_mount(fc); +- if (IS_ERR(mnt)) +- return mnt; ++ int ret = 0; + +- if (!ro2rw) +- return mnt; ++ if (fc->sb_flags & SB_RDONLY) ++ return ret; + +- /* We need to convert to rw, call reconfigure. */ +- fc->sb_flags &= ~SB_RDONLY; + down_write(&mnt->mnt_sb->s_umount); +- ret = btrfs_reconfigure(fc); ++ if (!(fc->sb_flags & SB_RDONLY) && (mnt->mnt_sb->s_flags & SB_RDONLY)) ++ ret = btrfs_reconfigure(fc); + up_write(&mnt->mnt_sb->s_umount); +- if (ret) { +- mntput(mnt); +- return ERR_PTR(ret); +- } +- return mnt; ++ return ret; + } + + static int btrfs_get_tree_subvol(struct fs_context *fc) +@@ -2025,6 +2007,7 @@ static int btrfs_get_tree_subvol(struct fs_context *fc) + struct fs_context *dup_fc; + struct dentry *dentry; + struct vfsmount *mnt; ++ int ret = 0; + + /* + * Setup a dummy root and fs_info for test/set super. This is because +@@ -2067,11 +2050,16 @@ static int btrfs_get_tree_subvol(struct fs_context *fc) + fc->security = NULL; + + mnt = fc_mount(dup_fc); +- if (PTR_ERR_OR_ZERO(mnt) == -EBUSY) +- mnt = btrfs_reconfigure_for_mount(dup_fc); +- put_fs_context(dup_fc); +- if (IS_ERR(mnt)) ++ if (IS_ERR(mnt)) { ++ put_fs_context(dup_fc); + return PTR_ERR(mnt); ++ } ++ ret = btrfs_reconfigure_for_mount(dup_fc, mnt); ++ put_fs_context(dup_fc); ++ if (ret) { ++ mntput(mnt); ++ return ret; ++ } + + /* + * This free's ->subvol_name, because if it isn't set we have to +-- +2.43.0 + diff --git a/queue-6.12/clk-en7523-initialize-num-before-accessing-hws-in-en.patch b/queue-6.12/clk-en7523-initialize-num-before-accessing-hws-in-en.patch new file mode 100644 index 00000000000..b7e58924025 --- /dev/null +++ b/queue-6.12/clk-en7523-initialize-num-before-accessing-hws-in-en.patch @@ -0,0 +1,49 @@ +From 77b84ce9293503e9c2d89a02b41a4cb180b7cf4f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 3 Dec 2024 22:29:15 +0800 +Subject: clk: en7523: Initialize num before accessing hws in + en7523_register_clocks() + +From: Haoyu Li + +[ Upstream commit 52fd1709e41d3a85b48bcfe2404a024ebaf30c3b ] + +With the new __counted_by annotation in clk_hw_onecell_data, the "num" +struct member must be set before accessing the "hws" array. Failing to +do so will trigger a runtime warning when enabling CONFIG_UBSAN_BOUNDS +and CONFIG_FORTIFY_SOURCE. + +Fixes: f316cdff8d67 ("clk: Annotate struct clk_hw_onecell_data with __counted_by") +Signed-off-by: Haoyu Li +Link: https://lore.kernel.org/r/20241203142915.345523-1-lihaoyu499@gmail.com +Signed-off-by: Stephen Boyd +Signed-off-by: Sasha Levin +--- + drivers/clk/clk-en7523.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/clk/clk-en7523.c b/drivers/clk/clk-en7523.c +index fdd8ea989ed24..bc21b29214492 100644 +--- a/drivers/clk/clk-en7523.c ++++ b/drivers/clk/clk-en7523.c +@@ -508,6 +508,8 @@ static void en7523_register_clocks(struct device *dev, struct clk_hw_onecell_dat + u32 rate; + int i; + ++ clk_data->num = EN7523_NUM_CLOCKS; ++ + for (i = 0; i < ARRAY_SIZE(en7523_base_clks); i++) { + const struct en_clk_desc *desc = &en7523_base_clks[i]; + u32 reg = desc->div_reg ? desc->div_reg : desc->base_reg; +@@ -529,8 +531,6 @@ static void en7523_register_clocks(struct device *dev, struct clk_hw_onecell_dat + + hw = en7523_register_pcie_clk(dev, np_base); + clk_data->hws[EN7523_CLK_PCIE] = hw; +- +- clk_data->num = EN7523_NUM_CLOCKS; + } + + static int en7523_clk_hw_init(struct platform_device *pdev, +-- +2.43.0 + diff --git a/queue-6.12/sched-core-prevent-wakeup-of-ksoftirqd-during-idle-l.patch b/queue-6.12/sched-core-prevent-wakeup-of-ksoftirqd-during-idle-l.patch new file mode 100644 index 00000000000..dd77078f621 --- /dev/null +++ b/queue-6.12/sched-core-prevent-wakeup-of-ksoftirqd-during-idle-l.patch @@ -0,0 +1,71 @@ +From 32a22393f00b0c0e39edcd0ba37589dc73122007 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 19 Nov 2024 05:44:32 +0000 +Subject: sched/core: Prevent wakeup of ksoftirqd during idle load balance + +From: K Prateek Nayak + +[ Upstream commit e932c4ab38f072ce5894b2851fea8bc5754bb8e5 ] + +Scheduler raises a SCHED_SOFTIRQ to trigger a load balancing event on +from the IPI handler on the idle CPU. If the SMP function is invoked +from an idle CPU via flush_smp_call_function_queue() then the HARD-IRQ +flag is not set and raise_softirq_irqoff() needlessly wakes ksoftirqd +because soft interrupts are handled before ksoftirqd get on the CPU. + +Adding a trace_printk() in nohz_csd_func() at the spot of raising +SCHED_SOFTIRQ and enabling trace events for sched_switch, sched_wakeup, +and softirq_entry (for SCHED_SOFTIRQ vector alone) helps observing the +current behavior: + + -0 [000] dN.1.: nohz_csd_func: Raising SCHED_SOFTIRQ from nohz_csd_func + -0 [000] dN.4.: sched_wakeup: comm=ksoftirqd/0 pid=16 prio=120 target_cpu=000 + -0 [000] .Ns1.: softirq_entry: vec=7 [action=SCHED] + -0 [000] .Ns1.: softirq_exit: vec=7 [action=SCHED] + -0 [000] d..2.: sched_switch: prev_comm=swapper/0 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=ksoftirqd/0 next_pid=16 next_prio=120 + ksoftirqd/0-16 [000] d..2.: sched_switch: prev_comm=ksoftirqd/0 prev_pid=16 prev_prio=120 prev_state=S ==> next_comm=swapper/0 next_pid=0 next_prio=120 + ... + +Use __raise_softirq_irqoff() to raise the softirq. The SMP function call +is always invoked on the requested CPU in an interrupt handler. It is +guaranteed that soft interrupts are handled at the end. + +Following are the observations with the changes when enabling the same +set of events: + + -0 [000] dN.1.: nohz_csd_func: Raising SCHED_SOFTIRQ for nohz_idle_balance + -0 [000] dN.1.: softirq_raise: vec=7 [action=SCHED] + -0 [000] .Ns1.: softirq_entry: vec=7 [action=SCHED] + +No unnecessary ksoftirqd wakeups are seen from idle task's context to +service the softirq. + +Fixes: b2a02fc43a1f ("smp: Optimize send_call_function_single_ipi()") +Closes: https://lore.kernel.org/lkml/fcf823f-195e-6c9a-eac3-25f870cb35ac@inria.fr/ [1] +Reported-by: Julia Lawall +Suggested-by: Sebastian Andrzej Siewior +Signed-off-by: K Prateek Nayak +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Sebastian Andrzej Siewior +Link: https://lore.kernel.org/r/20241119054432.6405-5-kprateek.nayak@amd.com +Signed-off-by: Sasha Levin +--- + kernel/sched/core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index 33bc43b223cba..6cc12777bb11a 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -1244,7 +1244,7 @@ static void nohz_csd_func(void *info) + rq->idle_balance = idle_cpu(cpu); + if (rq->idle_balance) { + rq->nohz_idle_balance = flags; +- raise_softirq_irqoff(SCHED_SOFTIRQ); ++ __raise_softirq_irqoff(SCHED_SOFTIRQ); + } + } + +-- +2.43.0 + diff --git a/queue-6.12/sched-core-remove-the-unnecessary-need_resched-check.patch b/queue-6.12/sched-core-remove-the-unnecessary-need_resched-check.patch new file mode 100644 index 00000000000..cf6c173f9d0 --- /dev/null +++ b/queue-6.12/sched-core-remove-the-unnecessary-need_resched-check.patch @@ -0,0 +1,122 @@ +From fa52fec40fd59740dc1236478cc120e29340d734 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 19 Nov 2024 05:44:30 +0000 +Subject: sched/core: Remove the unnecessary need_resched() check in + nohz_csd_func() + +From: K Prateek Nayak + +[ Upstream commit ea9cffc0a154124821531991d5afdd7e8b20d7aa ] + +The need_resched() check currently in nohz_csd_func() can be tracked +to have been added in scheduler_ipi() back in 2011 via commit +ca38062e57e9 ("sched: Use resched IPI to kick off the nohz idle balance") + +Since then, it has travelled quite a bit but it seems like an idle_cpu() +check currently is sufficient to detect the need to bail out from an +idle load balancing. To justify this removal, consider all the following +case where an idle load balancing could race with a task wakeup: + +o Since commit f3dd3f674555b ("sched: Remove the limitation of WF_ON_CPU + on wakelist if wakee cpu is idle") a target perceived to be idle + (target_rq->nr_running == 0) will return true for + ttwu_queue_cond(target) which will offload the task wakeup to the idle + target via an IPI. + + In all such cases target_rq->ttwu_pending will be set to 1 before + queuing the wake function. + + If an idle load balance races here, following scenarios are possible: + + - The CPU is not in TIF_POLLING_NRFLAG mode in which case an actual + IPI is sent to the CPU to wake it out of idle. If the + nohz_csd_func() queues before sched_ttwu_pending(), the idle load + balance will bail out since idle_cpu(target) returns 0 since + target_rq->ttwu_pending is 1. If the nohz_csd_func() is queued after + sched_ttwu_pending() it should see rq->nr_running to be non-zero and + bail out of idle load balancing. + + - The CPU is in TIF_POLLING_NRFLAG mode and instead of an actual IPI, + the sender will simply set TIF_NEED_RESCHED for the target to put it + out of idle and flush_smp_call_function_queue() in do_idle() will + execute the call function. Depending on the ordering of the queuing + of nohz_csd_func() and sched_ttwu_pending(), the idle_cpu() check in + nohz_csd_func() should either see target_rq->ttwu_pending = 1 or + target_rq->nr_running to be non-zero if there is a genuine task + wakeup racing with the idle load balance kick. + +o The waker CPU perceives the target CPU to be busy + (targer_rq->nr_running != 0) but the CPU is in fact going idle and due + to a series of unfortunate events, the system reaches a case where the + waker CPU decides to perform the wakeup by itself in ttwu_queue() on + the target CPU but target is concurrently selected for idle load + balance (XXX: Can this happen? I'm not sure, but we'll consider the + mother of all coincidences to estimate the worst case scenario). + + ttwu_do_activate() calls enqueue_task() which would increment + "rq->nr_running" post which it calls wakeup_preempt() which is + responsible for setting TIF_NEED_RESCHED (via a resched IPI or by + setting TIF_NEED_RESCHED on a TIF_POLLING_NRFLAG idle CPU) The key + thing to note in this case is that rq->nr_running is already non-zero + in case of a wakeup before TIF_NEED_RESCHED is set which would + lead to idle_cpu() check returning false. + +In all cases, it seems that need_resched() check is unnecessary when +checking for idle_cpu() first since an impending wakeup racing with idle +load balancer will either set the "rq->ttwu_pending" or indicate a newly +woken task via "rq->nr_running". + +Chasing the reason why this check might have existed in the first place, +I came across Peter's suggestion on the fist iteration of Suresh's +patch from 2011 [1] where the condition to raise the SCHED_SOFTIRQ was: + + sched_ttwu_do_pending(list); + + if (unlikely((rq->idle == current) && + rq->nohz_balance_kick && + !need_resched())) + raise_softirq_irqoff(SCHED_SOFTIRQ); + +Since the condition to raise the SCHED_SOFIRQ was preceded by +sched_ttwu_do_pending() (which is equivalent of sched_ttwu_pending()) in +the current upstream kernel, the need_resched() check was necessary to +catch a newly queued task. Peter suggested modifying it to: + + if (idle_cpu() && rq->nohz_balance_kick && !need_resched()) + raise_softirq_irqoff(SCHED_SOFTIRQ); + +where idle_cpu() seems to have replaced "rq->idle == current" check. + +Even back then, the idle_cpu() check would have been sufficient to catch +a new task being enqueued. Since commit b2a02fc43a1f ("smp: Optimize +send_call_function_single_ipi()") overloads the interpretation of +TIF_NEED_RESCHED for TIF_POLLING_NRFLAG idling, remove the +need_resched() check in nohz_csd_func() to raise SCHED_SOFTIRQ based +on Peter's suggestion. + +Fixes: b2a02fc43a1f ("smp: Optimize send_call_function_single_ipi()") +Suggested-by: Peter Zijlstra +Signed-off-by: K Prateek Nayak +Signed-off-by: Peter Zijlstra (Intel) +Link: https://lore.kernel.org/r/20241119054432.6405-3-kprateek.nayak@amd.com +Signed-off-by: Sasha Levin +--- + kernel/sched/core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index 76b27b2a9c56a..33bc43b223cba 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -1242,7 +1242,7 @@ static void nohz_csd_func(void *info) + WARN_ON(!(flags & NOHZ_KICK_MASK)); + + rq->idle_balance = idle_cpu(cpu); +- if (rq->idle_balance && !need_resched()) { ++ if (rq->idle_balance) { + rq->nohz_idle_balance = flags; + raise_softirq_irqoff(SCHED_SOFTIRQ); + } +-- +2.43.0 + diff --git a/queue-6.12/sched-deadline-fix-warning-in-migrate_enable-for-boo.patch b/queue-6.12/sched-deadline-fix-warning-in-migrate_enable-for-boo.patch new file mode 100644 index 00000000000..db5d4b89d99 --- /dev/null +++ b/queue-6.12/sched-deadline-fix-warning-in-migrate_enable-for-boo.patch @@ -0,0 +1,80 @@ +From 42f31181cfa5c009dce7095563ac0f6f8563f83f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 24 Jul 2024 11:22:47 -0300 +Subject: sched/deadline: Fix warning in migrate_enable for boosted tasks + +From: Wander Lairson Costa + +[ Upstream commit 0664e2c311b9fa43b33e3e81429cd0c2d7f9c638 ] + +When running the following command: + +while true; do + stress-ng --cyclic 30 --timeout 30s --minimize --quiet +done + +a warning is eventually triggered: + +WARNING: CPU: 43 PID: 2848 at kernel/sched/deadline.c:794 +setup_new_dl_entity+0x13e/0x180 +... +Call Trace: + + ? show_trace_log_lvl+0x1c4/0x2df + ? enqueue_dl_entity+0x631/0x6e0 + ? setup_new_dl_entity+0x13e/0x180 + ? __warn+0x7e/0xd0 + ? report_bug+0x11a/0x1a0 + ? handle_bug+0x3c/0x70 + ? exc_invalid_op+0x14/0x70 + ? asm_exc_invalid_op+0x16/0x20 + enqueue_dl_entity+0x631/0x6e0 + enqueue_task_dl+0x7d/0x120 + __do_set_cpus_allowed+0xe3/0x280 + __set_cpus_allowed_ptr_locked+0x140/0x1d0 + __set_cpus_allowed_ptr+0x54/0xa0 + migrate_enable+0x7e/0x150 + rt_spin_unlock+0x1c/0x90 + group_send_sig_info+0xf7/0x1a0 + ? kill_pid_info+0x1f/0x1d0 + kill_pid_info+0x78/0x1d0 + kill_proc_info+0x5b/0x110 + __x64_sys_kill+0x93/0xc0 + do_syscall_64+0x5c/0xf0 + entry_SYSCALL_64_after_hwframe+0x6e/0x76 + RIP: 0033:0x7f0dab31f92b + +This warning occurs because set_cpus_allowed dequeues and enqueues tasks +with the ENQUEUE_RESTORE flag set. If the task is boosted, the warning +is triggered. A boosted task already had its parameters set by +rt_mutex_setprio, and a new call to setup_new_dl_entity is unnecessary, +hence the WARN_ON call. + +Check if we are requeueing a boosted task and avoid calling +setup_new_dl_entity if that's the case. + +Fixes: 295d6d5e3736 ("sched/deadline: Fix switching to -deadline") +Signed-off-by: Wander Lairson Costa +Signed-off-by: Peter Zijlstra (Intel) +Acked-by: Juri Lelli +Link: https://lore.kernel.org/r/20240724142253.27145-2-wander@redhat.com +Signed-off-by: Sasha Levin +--- + kernel/sched/deadline.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c +index be1b917dc8ce4..40a1ad4493b4d 100644 +--- a/kernel/sched/deadline.c ++++ b/kernel/sched/deadline.c +@@ -2042,6 +2042,7 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags) + } else if (flags & ENQUEUE_REPLENISH) { + replenish_dl_entity(dl_se); + } else if ((flags & ENQUEUE_RESTORE) && ++ !is_dl_boosted(dl_se) && + dl_time_before(dl_se->deadline, rq_clock(rq_of_dl_se(dl_se)))) { + setup_new_dl_entity(dl_se); + } +-- +2.43.0 + diff --git a/queue-6.12/sched-fair-check-idle_cpu-before-need_resched-to-det.patch b/queue-6.12/sched-fair-check-idle_cpu-before-need_resched-to-det.patch new file mode 100644 index 00000000000..91f44af5f03 --- /dev/null +++ b/queue-6.12/sched-fair-check-idle_cpu-before-need_resched-to-det.patch @@ -0,0 +1,60 @@ +From c847cd589507d709a386802f9fa52e36a1726972 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 19 Nov 2024 05:44:31 +0000 +Subject: sched/fair: Check idle_cpu() before need_resched() to detect ilb CPU + turning busy + +From: K Prateek Nayak + +[ Upstream commit ff47a0acfcce309cf9e175149c75614491953c8f ] + +Commit b2a02fc43a1f ("smp: Optimize send_call_function_single_ipi()") +optimizes IPIs to idle CPUs in TIF_POLLING_NRFLAG mode by setting the +TIF_NEED_RESCHED flag in idle task's thread info and relying on +flush_smp_call_function_queue() in idle exit path to run the +call-function. A softirq raised by the call-function is handled shortly +after in do_softirq_post_smp_call_flush() but the TIF_NEED_RESCHED flag +remains set and is only cleared later when schedule_idle() calls +__schedule(). + +need_resched() check in _nohz_idle_balance() exists to bail out of load +balancing if another task has woken up on the CPU currently in-charge of +idle load balancing which is being processed in SCHED_SOFTIRQ context. +Since the optimization mentioned above overloads the interpretation of +TIF_NEED_RESCHED, check for idle_cpu() before going with the existing +need_resched() check which can catch a genuine task wakeup on an idle +CPU processing SCHED_SOFTIRQ from do_softirq_post_smp_call_flush(), as +well as the case where ksoftirqd needs to be preempted as a result of +new task wakeup or slice expiry. + +In case of PREEMPT_RT or threadirqs, although the idle load balancing +may be inhibited in some cases on the ilb CPU, the fact that ksoftirqd +is the only fair task going back to sleep will trigger a newidle balance +on the CPU which will alleviate some imbalance if it exists if idle +balance fails to do so. + +Fixes: b2a02fc43a1f ("smp: Optimize send_call_function_single_ipi()") +Signed-off-by: K Prateek Nayak +Signed-off-by: Peter Zijlstra (Intel) +Link: https://lore.kernel.org/r/20241119054432.6405-4-kprateek.nayak@amd.com +Signed-off-by: Sasha Levin +--- + kernel/sched/fair.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 49fa456cd3320..782ce70ebd1b0 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -12580,7 +12580,7 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags) + * work being done for other CPUs. Next load + * balancing owner will pick it up. + */ +- if (need_resched()) { ++ if (!idle_cpu(this_cpu) && need_resched()) { + if (flags & NOHZ_STATS_KICK) + has_blocked_load = true; + if (flags & NOHZ_NEXT_KICK) +-- +2.43.0 + diff --git a/queue-6.12/sched-fix-warning-in-sched_setaffinity.patch b/queue-6.12/sched-fix-warning-in-sched_setaffinity.patch new file mode 100644 index 00000000000..5728b9b1d05 --- /dev/null +++ b/queue-6.12/sched-fix-warning-in-sched_setaffinity.patch @@ -0,0 +1,53 @@ +From 02671e16d4b0c6b003657a04c2c37546f4df4b17 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 11 Nov 2024 10:27:38 -0800 +Subject: sched: fix warning in sched_setaffinity + +From: Josh Don + +[ Upstream commit 70ee7947a29029736a1a06c73a48ff37674a851b ] + +Commit 8f9ea86fdf99b added some logic to sched_setaffinity that included +a WARN when a per-task affinity assignment races with a cpuset update. + +Specifically, we can have a race where a cpuset update results in the +task affinity no longer being a subset of the cpuset. That's fine; we +have a fallback to instead use the cpuset mask. However, we have a WARN +set up that will trigger if the cpuset mask has no overlap at all with +the requested task affinity. This shouldn't be a warning condition; its +trivial to create this condition. + +Reproduced the warning by the following setup: + +- $PID inside a cpuset cgroup +- another thread repeatedly switching the cpuset cpus from 1-2 to just 1 +- another thread repeatedly setting the $PID affinity (via taskset) to 2 + +Fixes: 8f9ea86fdf99b ("sched: Always preserve the user requested cpumask") +Signed-off-by: Josh Don +Acked-and-tested-by: Vincent Guittot +Signed-off-by: Peter Zijlstra (Intel) +Acked-by: Waiman Long +Tested-by: Madadi Vineeth Reddy +Link: https://lkml.kernel.org/r/20241111182738.1832953-1-joshdon@google.com +Signed-off-by: Sasha Levin +--- + kernel/sched/syscalls.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c +index 24f9f90b6574e..1784ed1fb3fe5 100644 +--- a/kernel/sched/syscalls.c ++++ b/kernel/sched/syscalls.c +@@ -1238,7 +1238,7 @@ int __sched_setaffinity(struct task_struct *p, struct affinity_context *ctx) + bool empty = !cpumask_and(new_mask, new_mask, + ctx->user_mask); + +- if (WARN_ON_ONCE(empty)) ++ if (empty) + cpumask_copy(new_mask, cpus_allowed); + } + __set_cpus_allowed_ptr(p, ctx); +-- +2.43.0 + diff --git a/queue-6.12/series b/queue-6.12/series index 1b9c6e01f07..89d1ae1d251 100644 --- a/queue-6.12/series +++ b/queue-6.12/series @@ -438,3 +438,15 @@ drm-xe-forcewake-add-a-helper-xe_force_wake_ref_has_.patch drm-xe-devcoredump-update-handling-of-xe_force_wake_.patch drm-amd-display-update-interface-to-check-uclk-dpm.patch drm-amd-display-add-option-to-retrieve-detile-buffer.patch +sched-fix-warning-in-sched_setaffinity.patch +sched-core-remove-the-unnecessary-need_resched-check.patch +sched-fair-check-idle_cpu-before-need_resched-to-det.patch +sched-core-prevent-wakeup-of-ksoftirqd-during-idle-l.patch +sched-deadline-fix-warning-in-migrate_enable-for-boo.patch +btrfs-drop-unused-parameter-options-from-open_ctree.patch +btrfs-drop-unused-parameter-data-from-btrfs_fill_sup.patch +btrfs-fix-mount-failure-due-to-remount-races.patch +btrfs-fix-missing-snapshot-drew-unlock-when-root-is-.patch +clk-en7523-initialize-num-before-accessing-hws-in-en.patch +tracing-eprobe-fix-to-release-eprobe-when-failed-to-.patch +x86-fix-build-regression-with-config_kexec_jump-enab.patch diff --git a/queue-6.12/tracing-eprobe-fix-to-release-eprobe-when-failed-to-.patch b/queue-6.12/tracing-eprobe-fix-to-release-eprobe-when-failed-to-.patch new file mode 100644 index 00000000000..cf76db35184 --- /dev/null +++ b/queue-6.12/tracing-eprobe-fix-to-release-eprobe-when-failed-to-.patch @@ -0,0 +1,40 @@ +From 0066b9a4538d3ac8c1fb6e46416acbaa35a188da Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 30 Nov 2024 01:47:47 +0900 +Subject: tracing/eprobe: Fix to release eprobe when failed to add dyn_event + +From: Masami Hiramatsu (Google) + +[ Upstream commit 494b332064c0ce2f7392fa92632bc50191c1b517 ] + +Fix eprobe event to unregister event call and release eprobe when it fails +to add dynamic event correctly. + +Link: https://lore.kernel.org/all/173289886698.73724.1959899350183686006.stgit@devnote2/ + +Fixes: 7491e2c44278 ("tracing: Add a probe that attaches to trace events") +Signed-off-by: Masami Hiramatsu (Google) +Signed-off-by: Sasha Levin +--- + kernel/trace/trace_eprobe.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c +index ebda68ee9abff..be8be0c1aaf0f 100644 +--- a/kernel/trace/trace_eprobe.c ++++ b/kernel/trace/trace_eprobe.c +@@ -963,6 +963,11 @@ static int __trace_eprobe_create(int argc, const char *argv[]) + goto error; + } + ret = dyn_event_add(&ep->devent, &ep->tp.event->call); ++ if (ret < 0) { ++ trace_probe_unregister_event_call(&ep->tp); ++ mutex_unlock(&event_mutex); ++ goto error; ++ } + mutex_unlock(&event_mutex); + return ret; + parse_error: +-- +2.43.0 + diff --git a/queue-6.12/x86-fix-build-regression-with-config_kexec_jump-enab.patch b/queue-6.12/x86-fix-build-regression-with-config_kexec_jump-enab.patch new file mode 100644 index 00000000000..b9104083693 --- /dev/null +++ b/queue-6.12/x86-fix-build-regression-with-config_kexec_jump-enab.patch @@ -0,0 +1,50 @@ +From 90d46a03b930ea01b123f54a9fcc8d209f8b0329 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 9 Dec 2024 08:53:32 +0900 +Subject: x86: Fix build regression with CONFIG_KEXEC_JUMP enabled + +From: Damien Le Moal + +[ Upstream commit aeb68937614f4aeceaaa762bd7f0212ce842b797 ] + +Build 6.13-rc12 for x86_64 with gcc 14.2.1 fails with the error: + + ld: vmlinux.o: in function `virtual_mapped': + linux/arch/x86/kernel/relocate_kernel_64.S:249:(.text+0x5915b): undefined reference to `saved_context_gdt_desc' + +when CONFIG_KEXEC_JUMP is enabled. + +This was introduced by commit 07fa619f2a40 ("x86/kexec: Restore GDT on +return from ::preserve_context kexec") which introduced a use of +saved_context_gdt_desc without a declaration for it. + +Fix that by including asm/asm-offsets.h where saved_context_gdt_desc +is defined (indirectly in include/generated/asm-offsets.h which +asm/asm-offsets.h includes). + +Fixes: 07fa619f2a40 ("x86/kexec: Restore GDT on return from ::preserve_context kexec") +Signed-off-by: Damien Le Moal +Acked-by: Borislav Petkov (AMD) +Acked-by: David Woodhouse +Closes: https://lore.kernel.org/oe-kbuild-all/202411270006.ZyyzpYf8-lkp@intel.com/ +Signed-off-by: Linus Torvalds +Signed-off-by: Sasha Levin +--- + arch/x86/kernel/relocate_kernel_64.S | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S +index 1236f25fc8d12..540443d699e3c 100644 +--- a/arch/x86/kernel/relocate_kernel_64.S ++++ b/arch/x86/kernel/relocate_kernel_64.S +@@ -13,6 +13,7 @@ + #include + #include + #include ++#include + + /* + * Must be relocatable PIC code callable as a C function, in particular +-- +2.43.0 +