From: Greg Kroah-Hartman Date: Sun, 26 Oct 2025 14:52:10 +0000 (+0100) Subject: 6.17-stable patches X-Git-Tag: v5.4.301~35 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=17d1e611211ff8669caa6eb31b45777b73698260;p=thirdparty%2Fkernel%2Fstable-queue.git 6.17-stable patches added patches: acpica-work-around-bogus-wstringop-overread-warning-since-gcc-11.patch arch_topology-fix-incorrect-error-check-in-topology_parse_cpu_capacity.patch arm64-mte-do-not-warn-if-the-page-is-already-tagged-in-copy_highpage.patch btrfs-directly-free-partially-initialized-fs_info-in-btrfs_check_leaked_roots.patch btrfs-ref-verify-fix-is_err-vs-null-check-in-btrfs_build_ref_tree.patch btrfs-send-fix-duplicated-rmdir-operations-when-using-extrefs.patch can-netlink-can_changelink-allow-disabling-of-automatic-restart.patch cifs-fix-tcp_server_info-credits-to-be-signed.patch devcoredump-fix-circular-locking-dependency-with-devcd-mutex.patch dma-debug-don-t-report-false-positives-with-dma_bounce_unaligned_kmalloc.patch drm-amd-display-increase-max-link-count-and-fix-link-enc-null-pointer-access.patch drm-xe-check-return-value-of-ggtt-workqueue-allocation.patch fs-notify-call-exportfs_encode_fid-with-s_umount.patch gpio-104-idio-16-define-maximum-valid-register-address-offset.patch gpio-pci-idio-16-define-maximum-valid-register-address-offset.patch hung_task-fix-warnings-caused-by-unaligned-lock-pointers.patch hwmon-pmbus-max34440-update-adpm12160-coeff-due-to-latest-fw.patch io_uring-sqpoll-be-smarter-on-when-to-update-the-stime-usage.patch io_uring-sqpoll-switch-away-from-getrusage-for-cpu-accounting.patch mips-malta-fix-keyboard-resource-preventing-i8042-driver-from-registering.patch mm-damon-core-fix-list_add_tail-call-on-damon_call.patch mm-damon-core-fix-potential-memory-leak-by-cleaning-ops_filter-in-damon_destroy_scheme.patch mm-damon-core-use-damos_commit_quota_goal-for-new-goal-commit.patch mm-damon-sysfs-catch-commit-test-ctx-alloc-failure.patch mm-damon-sysfs-dealloc-commit-test-ctx-always.patch mm-don-t-spin-in-add_stack_record-when-gfp-flags-don-t-allow.patch mm-mremap-correctly-account-old-mapping-after-mremap_dontunmap-remap.patch mm-prevent-poison-consumption-when-splitting-thp.patch mptcp-pm-in-kernel-c-flag-handle-late-add_addr.patch net-bonding-fix-possible-peer-notify-event-loss-or-dup-issue.patch net-bonding-update-the-slave-array-for-broadcast-mode.patch net-ravb-enforce-descriptor-type-ordering.patch net-ravb-ensure-memory-write-completes-before-ringing-tx-doorbell.patch net-stmmac-dwmac-rk-fix-disabling-set_clock_selection.patch net-usb-rtl8150-fix-frame-padding.patch ocfs2-clear-extent-cache-after-moving-defragmenting-extents.patch platform-x86-alienware-wmi-wmax-add-awcc-support-to-dell-g15-5530.patch platform-x86-alienware-wmi-wmax-fix-null-pointer-dereference-in-sleep-handlers.patch revert-cpuidle-menu-avoid-discarding-useful-information.patch riscv-cpufeature-avoid-uninitialized-variable-in-has_thead_homogeneous_vlenb.patch riscv-hwprobe-fix-stale-vdso-data-for-late-initialized-keys-at-boot.patch rust-device-fix-device-context-of-device-parent.patch rv-fully-convert-enabled_monitors-to-use-list_head-as-iterator.patch rv-make-rtapp-pagefault-monitor-depends-on-config_mmu.patch selftests-mptcp-join-mark-delete-re-add-signal-as-skipped-if-not-supported.patch selftests-mptcp-join-mark-flush-re-add-as-skipped-if-not-supported.patch selftests-mptcp-join-mark-implicit-tests-as-skipped-if-not-supported.patch slab-avoid-race-on-slab-obj_exts-in-alloc_slab_obj_exts.patch slab-fix-obj_ext-mistakenly-considered-null-due-to-race-condition.patch smb-client-get-rid-of-d_drop-in-cifs_do_rename.patch virtio-net-zero-unused-hash-fields.patch vsock-fix-lock-inversion-in-vsock_assign_transport.patch xfs-fix-locking-in-xchk_nlinks_collect_dir.patch --- diff --git a/queue-6.17/acpica-work-around-bogus-wstringop-overread-warning-since-gcc-11.patch b/queue-6.17/acpica-work-around-bogus-wstringop-overread-warning-since-gcc-11.patch new file mode 100644 index 0000000000..40743db086 --- /dev/null +++ b/queue-6.17/acpica-work-around-bogus-wstringop-overread-warning-since-gcc-11.patch @@ -0,0 +1,52 @@ +From 6e3a4754717a74e931a9f00b5f953be708e07acb Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Tue, 21 Oct 2025 17:28:25 +0800 +Subject: ACPICA: Work around bogus -Wstringop-overread warning since GCC 11 + +From: Xi Ruoyao + +commit 6e3a4754717a74e931a9f00b5f953be708e07acb upstream. + +When ACPI_MISALIGNMENT_NOT_SUPPORTED is set, GCC can produce a bogus +-Wstringop-overread warning, see [1]. + +To me, it's very clear that we have a compiler bug here, thus just +disable the warning. + +Fixes: a9d13433fe17 ("LoongArch: Align ACPI structures if ARCH_STRICT_ALIGN enabled") +Link: https://lore.kernel.org/all/899f2dec-e8b9-44f4-ab8d-001e160a2aed@roeck-us.net/ +Link: https://github.com/acpica/acpica/commit/abf5b573 +Link: https://gcc.gnu.org/PR122073 [1] +Co-developed-by: Saket Dumbre +Signed-off-by: Saket Dumbre +Signed-off-by: Xi Ruoyao +Acked-by: Huacai Chen +Cc: All applicable +[ rjw: Subject and changelog edits ] +Link: https://patch.msgid.link/20251021092825.822007-1-xry111@xry111.site +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman +--- + drivers/acpi/acpica/tbprint.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/drivers/acpi/acpica/tbprint.c ++++ b/drivers/acpi/acpica/tbprint.c +@@ -95,6 +95,11 @@ acpi_tb_print_table_header(acpi_physical + { + struct acpi_table_header local_header; + ++#pragma GCC diagnostic push ++#if defined(__GNUC__) && __GNUC__ >= 11 ++#pragma GCC diagnostic ignored "-Wstringop-overread" ++#endif ++ + if (ACPI_COMPARE_NAMESEG(header->signature, ACPI_SIG_FACS)) { + + /* FACS only has signature and length fields */ +@@ -135,4 +140,5 @@ acpi_tb_print_table_header(acpi_physical + local_header.asl_compiler_id, + local_header.asl_compiler_revision)); + } ++#pragma GCC diagnostic pop + } diff --git a/queue-6.17/arch_topology-fix-incorrect-error-check-in-topology_parse_cpu_capacity.patch b/queue-6.17/arch_topology-fix-incorrect-error-check-in-topology_parse_cpu_capacity.patch new file mode 100644 index 0000000000..0e17ef6a4f --- /dev/null +++ b/queue-6.17/arch_topology-fix-incorrect-error-check-in-topology_parse_cpu_capacity.patch @@ -0,0 +1,48 @@ +From 2eead19334516c8e9927c11b448fbe512b1f18a1 Mon Sep 17 00:00:00 2001 +From: Kaushlendra Kumar +Date: Tue, 23 Sep 2025 23:13:08 +0530 +Subject: arch_topology: Fix incorrect error check in topology_parse_cpu_capacity() + +From: Kaushlendra Kumar + +commit 2eead19334516c8e9927c11b448fbe512b1f18a1 upstream. + +Fix incorrect use of PTR_ERR_OR_ZERO() in topology_parse_cpu_capacity() +which causes the code to proceed with NULL clock pointers. The current +logic uses !PTR_ERR_OR_ZERO(cpu_clk) which evaluates to true for both +valid pointers and NULL, leading to potential NULL pointer dereference +in clk_get_rate(). + +Per include/linux/err.h documentation, PTR_ERR_OR_ZERO(ptr) returns: +"The error code within @ptr if it is an error pointer; 0 otherwise." + +This means PTR_ERR_OR_ZERO() returns 0 for both valid pointers AND NULL +pointers. Therefore !PTR_ERR_OR_ZERO(cpu_clk) evaluates to true (proceed) +when cpu_clk is either valid or NULL, causing clk_get_rate(NULL) to be +called when of_clk_get() returns NULL. + +Replace with !IS_ERR_OR_NULL(cpu_clk) which only proceeds for valid +pointers, preventing potential NULL pointer dereference in clk_get_rate(). + +Cc: stable +Signed-off-by: Kaushlendra Kumar +Reviewed-by: Sudeep Holla +Fixes: b8fe128dad8f ("arch_topology: Adjust initial CPU capacities with current freq") +Link: https://patch.msgid.link/20250923174308.1771906-1-kaushlendra.kumar@intel.com +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Greg Kroah-Hartman +--- + drivers/base/arch_topology.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/base/arch_topology.c ++++ b/drivers/base/arch_topology.c +@@ -292,7 +292,7 @@ bool __init topology_parse_cpu_capacity( + * frequency (by keeping the initial capacity_freq_ref value). + */ + cpu_clk = of_clk_get(cpu_node, 0); +- if (!PTR_ERR_OR_ZERO(cpu_clk)) { ++ if (!IS_ERR_OR_NULL(cpu_clk)) { + per_cpu(capacity_freq_ref, cpu) = + clk_get_rate(cpu_clk) / HZ_PER_KHZ; + clk_put(cpu_clk); diff --git a/queue-6.17/arm64-mte-do-not-warn-if-the-page-is-already-tagged-in-copy_highpage.patch b/queue-6.17/arm64-mte-do-not-warn-if-the-page-is-already-tagged-in-copy_highpage.patch new file mode 100644 index 0000000000..5dd93ee083 --- /dev/null +++ b/queue-6.17/arm64-mte-do-not-warn-if-the-page-is-already-tagged-in-copy_highpage.patch @@ -0,0 +1,61 @@ +From b98c94eed4a975e0c80b7e90a649a46967376f58 Mon Sep 17 00:00:00 2001 +From: Catalin Marinas +Date: Wed, 22 Oct 2025 11:09:14 +0100 +Subject: arm64: mte: Do not warn if the page is already tagged in copy_highpage() + +From: Catalin Marinas + +commit b98c94eed4a975e0c80b7e90a649a46967376f58 upstream. + +The arm64 copy_highpage() assumes that the destination page is newly +allocated and not MTE-tagged (PG_mte_tagged unset) and warns +accordingly. However, following commit 060913999d7a ("mm: migrate: +support poisoned recover from migrate folio"), folio_mc_copy() is called +before __folio_migrate_mapping(). If the latter fails (-EAGAIN), the +copy will be done again to the same destination page. Since +copy_highpage() already set the PG_mte_tagged flag, this second copy +will warn. + +Replace the WARN_ON_ONCE(page already tagged) in the arm64 +copy_highpage() with a comment. + +Reported-by: syzbot+d1974fc28545a3e6218b@syzkaller.appspotmail.com +Link: https://lore.kernel.org/r/68dda1ae.a00a0220.102ee.0065.GAE@google.com +Reviewed-by: David Hildenbrand +Cc: Will Deacon +Cc: Kefeng Wang +Cc: stable@vger.kernel.org # 6.12.x +Reviewed-by: Yang Shi +Signed-off-by: Catalin Marinas +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/mm/copypage.c | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +--- a/arch/arm64/mm/copypage.c ++++ b/arch/arm64/mm/copypage.c +@@ -35,7 +35,7 @@ void copy_highpage(struct page *to, stru + from != folio_page(src, 0)) + return; + +- WARN_ON_ONCE(!folio_try_hugetlb_mte_tagging(dst)); ++ folio_try_hugetlb_mte_tagging(dst); + + /* + * Populate tags for all subpages. +@@ -51,8 +51,13 @@ void copy_highpage(struct page *to, stru + } + folio_set_hugetlb_mte_tagged(dst); + } else if (page_mte_tagged(from)) { +- /* It's a new page, shouldn't have been tagged yet */ +- WARN_ON_ONCE(!try_page_mte_tagging(to)); ++ /* ++ * Most of the time it's a new page that shouldn't have been ++ * tagged yet. However, folio migration can end up reusing the ++ * same page without untagging it. Ignore the warning if the ++ * page is already tagged. ++ */ ++ try_page_mte_tagging(to); + + mte_copy_page_tags(kto, kfrom); + set_page_mte_tagged(to); diff --git a/queue-6.17/btrfs-directly-free-partially-initialized-fs_info-in-btrfs_check_leaked_roots.patch b/queue-6.17/btrfs-directly-free-partially-initialized-fs_info-in-btrfs_check_leaked_roots.patch new file mode 100644 index 0000000000..d59a654826 --- /dev/null +++ b/queue-6.17/btrfs-directly-free-partially-initialized-fs_info-in-btrfs_check_leaked_roots.patch @@ -0,0 +1,76 @@ +From 17679ac6df6c4830ba711835aa8cf961be36cfa1 Mon Sep 17 00:00:00 2001 +From: Dewei Meng +Date: Thu, 16 Oct 2025 14:10:11 +0800 +Subject: btrfs: directly free partially initialized fs_info in btrfs_check_leaked_roots() + +From: Dewei Meng + +commit 17679ac6df6c4830ba711835aa8cf961be36cfa1 upstream. + +If fs_info->super_copy or fs_info->super_for_commit allocated failed in +btrfs_get_tree_subvol(), then no need to call btrfs_free_fs_info(). +Otherwise btrfs_check_leaked_roots() would access NULL pointer because +fs_info->allocated_roots had not been initialised. + +syzkaller reported the following information: + ------------[ cut here ]------------ + BUG: unable to handle page fault for address: fffffffffffffbb0 + #PF: supervisor read access in kernel mode + #PF: error_code(0x0000) - not-present page + PGD 64c9067 P4D 64c9067 PUD 64cb067 PMD 0 + Oops: Oops: 0000 [#1] SMP KASAN PTI + CPU: 0 UID: 0 PID: 1402 Comm: syz.1.35 Not tainted 6.15.8 #4 PREEMPT(lazy) + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), (...) + RIP: 0010:arch_atomic_read arch/x86/include/asm/atomic.h:23 [inline] + RIP: 0010:raw_atomic_read include/linux/atomic/atomic-arch-fallback.h:457 [inline] + RIP: 0010:atomic_read include/linux/atomic/atomic-instrumented.h:33 [inline] + RIP: 0010:refcount_read include/linux/refcount.h:170 [inline] + RIP: 0010:btrfs_check_leaked_roots+0x18f/0x2c0 fs/btrfs/disk-io.c:1230 + [...] + Call Trace: + + btrfs_free_fs_info+0x310/0x410 fs/btrfs/disk-io.c:1280 + btrfs_get_tree_subvol+0x592/0x6b0 fs/btrfs/super.c:2029 + btrfs_get_tree+0x63/0x80 fs/btrfs/super.c:2097 + vfs_get_tree+0x98/0x320 fs/super.c:1759 + do_new_mount+0x357/0x660 fs/namespace.c:3899 + path_mount+0x716/0x19c0 fs/namespace.c:4226 + do_mount fs/namespace.c:4239 [inline] + __do_sys_mount fs/namespace.c:4450 [inline] + __se_sys_mount fs/namespace.c:4427 [inline] + __x64_sys_mount+0x28c/0x310 fs/namespace.c:4427 + do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] + do_syscall_64+0x92/0x180 arch/x86/entry/syscall_64.c:94 + entry_SYSCALL_64_after_hwframe+0x76/0x7e + RIP: 0033:0x7f032eaffa8d + [...] + +Fixes: 3bb17a25bcb0 ("btrfs: add get_tree callback for new mount API") +CC: stable@vger.kernel.org # 6.12+ +Reviewed-by: Daniel Vacek +Reviewed-by: Qu Wenruo +Signed-off-by: Dewei Meng +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/super.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/super.c ++++ b/fs/btrfs/super.c +@@ -2070,7 +2070,13 @@ static int btrfs_get_tree_subvol(struct + fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL); + fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL); + if (!fs_info->super_copy || !fs_info->super_for_commit) { +- btrfs_free_fs_info(fs_info); ++ /* ++ * Dont call btrfs_free_fs_info() to free it as it's still ++ * initialized partially. ++ */ ++ kfree(fs_info->super_copy); ++ kfree(fs_info->super_for_commit); ++ kvfree(fs_info); + return -ENOMEM; + } + btrfs_init_fs_info(fs_info); diff --git a/queue-6.17/btrfs-ref-verify-fix-is_err-vs-null-check-in-btrfs_build_ref_tree.patch b/queue-6.17/btrfs-ref-verify-fix-is_err-vs-null-check-in-btrfs_build_ref_tree.patch new file mode 100644 index 0000000000..dcb567eba8 --- /dev/null +++ b/queue-6.17/btrfs-ref-verify-fix-is_err-vs-null-check-in-btrfs_build_ref_tree.patch @@ -0,0 +1,40 @@ +From ada7d45b568abe4f1fd9c53d66e05fbea300674b Mon Sep 17 00:00:00 2001 +From: Amit Dhingra +Date: Tue, 21 Oct 2025 07:07:20 -0500 +Subject: btrfs: ref-verify: fix IS_ERR() vs NULL check in btrfs_build_ref_tree() + +From: Amit Dhingra + +commit ada7d45b568abe4f1fd9c53d66e05fbea300674b upstream. + +btrfs_extent_root()/btrfs_global_root() does not return error pointers, +it returns NULL on error. + +Reported-by: Dan Carpenter +Link: https://lore.kernel.org/all/aNJfvxj0anEnk9Dm@stanley.mountain/ +Fixes : ed4e6b5d644c ("btrfs: ref-verify: handle damaged extent root tree") +CC: stable@vger.kernel.org # 6.17+ +Signed-off-by: Amit Dhingra +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/ref-verify.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c +index de4cb0f3fbd0..e9224145d754 100644 +--- a/fs/btrfs/ref-verify.c ++++ b/fs/btrfs/ref-verify.c +@@ -982,7 +982,7 @@ int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info) + + extent_root = btrfs_extent_root(fs_info, 0); + /* If the extent tree is damaged we cannot ignore it (IGNOREBADROOTS). */ +- if (IS_ERR(extent_root)) { ++ if (!extent_root) { + btrfs_warn(fs_info, "ref-verify: extent tree not available, disabling"); + btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY); + return 0; +-- +2.51.1 + diff --git a/queue-6.17/btrfs-send-fix-duplicated-rmdir-operations-when-using-extrefs.patch b/queue-6.17/btrfs-send-fix-duplicated-rmdir-operations-when-using-extrefs.patch new file mode 100644 index 0000000000..59b177c592 --- /dev/null +++ b/queue-6.17/btrfs-send-fix-duplicated-rmdir-operations-when-using-extrefs.patch @@ -0,0 +1,221 @@ +From 1fabe43b4e1a97597ec5d5ffcd2b7cf96e654b8f Mon Sep 17 00:00:00 2001 +From: Ting-Chang Hou +Date: Thu, 16 Oct 2025 15:53:51 +0800 +Subject: btrfs: send: fix duplicated rmdir operations when using extrefs + +From: Ting-Chang Hou + +commit 1fabe43b4e1a97597ec5d5ffcd2b7cf96e654b8f upstream. + +Commit 29d6d30f5c8a ("Btrfs: send, don't send rmdir for same target +multiple times") has fixed an issue that a send stream contained a rmdir +operation for the same directory multiple times. After that fix we keep +track of the last directory for which we sent a rmdir operation and +compare with it before sending a rmdir for the parent inode of a deleted +hardlink we are processing. But there is still a corner case that in +between rmdir dir operations for the same inode we find deleted hardlinks +for other parent inodes, so tracking just the last inode for which we sent +a rmdir operation is not enough. + +Hardlinks of a file in the same directory are stored in the same INODE_REF +item, but if the number of hardlinks is too large and can not fit in a +leaf, we use INODE_EXTREF items to store them. The key of an INODE_EXTREF +item is (inode_id, INODE_EXTREF, hash[name, parent ino]), so between two +hardlinks for the same parent directory, we can find others for other +parent directories. For example for the reproducer below we get the +following (from a btrfs inspect-internal dump-tree output): + + item 0 key (259 INODE_EXTREF 2309449) itemoff 16257 itemsize 26 + index 6925 parent 257 namelen 8 name: foo.6923 + item 1 key (259 INODE_EXTREF 2311350) itemoff 16231 itemsize 26 + index 6588 parent 258 namelen 8 name: foo.6587 + item 2 key (259 INODE_EXTREF 2457395) itemoff 16205 itemsize 26 + index 6611 parent 257 namelen 8 name: foo.6609 + (...) + +So tracking the last directory's inode number does not work in this case +since we process a link for parent inode 257, then for 258 and then back +again for 257, and that second time we process a deleted link for 257 we +think we have not yet sent a rmdir operation. + +Fix this by using a rbtree to keep track of all the directories for which +we have already sent rmdir operations, and add those directories to the +'check_dirs' ref list in process_recorded_refs() only if the directory is +not yet in the rbtree, otherwise skip it since it means we have already +sent a rmdir operation for that directory. + +The following test script reproduces the problem: + + $ cat test.sh + #!/bin/bash + + DEV=/dev/sdi + MNT=/mnt/sdi + + mkfs.btrfs -f $DEV + mount $DEV $MNT + + mkdir $MNT/a $MNT/b + + echo 123 > $MNT/a/foo + for ((i = 1; i <= 1000; i++)); do + ln $MNT/a/foo $MNT/a/foo.$i + ln $MNT/a/foo $MNT/b/foo.$i + done + + btrfs subvolume snapshot -r $MNT $MNT/snap1 + btrfs send $MNT/snap1 -f /tmp/base.send + + rm -r $MNT/a $MNT/b + + btrfs subvolume snapshot -r $MNT $MNT/snap2 + btrfs send -p $MNT/snap1 $MNT/snap2 -f /tmp/incremental.send + + umount $MNT + mkfs.btrfs -f $DEV + mount $DEV $MNT + + btrfs receive $MNT -f /tmp/base.send + btrfs receive $MNT -f /tmp/incremental.send + + rm -f /tmp/base.send /tmp/incremental.send + + umount $MNT + +When running it, it fails like this: + + $ ./test.sh + (...) + At subvol snap1 + At snapshot snap2 + ERROR: rmdir o257-9-0 failed: No such file or directory + +CC: +Reviewed-by: Filipe Manana +Signed-off-by: Ting-Chang Hou +[ Updated changelog ] +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/send.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++-------- + 1 file changed, 48 insertions(+), 8 deletions(-) + +--- a/fs/btrfs/send.c ++++ b/fs/btrfs/send.c +@@ -4148,6 +4148,48 @@ out: + return ret; + } + ++static int rbtree_check_dir_ref_comp(const void *k, const struct rb_node *node) ++{ ++ const struct recorded_ref *data = k; ++ const struct recorded_ref *ref = rb_entry(node, struct recorded_ref, node); ++ ++ if (data->dir > ref->dir) ++ return 1; ++ if (data->dir < ref->dir) ++ return -1; ++ if (data->dir_gen > ref->dir_gen) ++ return 1; ++ if (data->dir_gen < ref->dir_gen) ++ return -1; ++ return 0; ++} ++ ++static bool rbtree_check_dir_ref_less(struct rb_node *node, const struct rb_node *parent) ++{ ++ const struct recorded_ref *entry = rb_entry(node, struct recorded_ref, node); ++ ++ return rbtree_check_dir_ref_comp(entry, parent) < 0; ++} ++ ++static int record_check_dir_ref_in_tree(struct rb_root *root, ++ struct recorded_ref *ref, struct list_head *list) ++{ ++ struct recorded_ref *tmp_ref; ++ int ret; ++ ++ if (rb_find(ref, root, rbtree_check_dir_ref_comp)) ++ return 0; ++ ++ ret = dup_ref(ref, list); ++ if (ret < 0) ++ return ret; ++ ++ tmp_ref = list_last_entry(list, struct recorded_ref, list); ++ rb_add(&tmp_ref->node, root, rbtree_check_dir_ref_less); ++ tmp_ref->root = root; ++ return 0; ++} ++ + static int rename_current_inode(struct send_ctx *sctx, + struct fs_path *current_path, + struct fs_path *new_path) +@@ -4175,11 +4217,11 @@ static int process_recorded_refs(struct + struct recorded_ref *cur; + struct recorded_ref *cur2; + LIST_HEAD(check_dirs); ++ struct rb_root rbtree_check_dirs = RB_ROOT; + struct fs_path *valid_path = NULL; + u64 ow_inode = 0; + u64 ow_gen; + u64 ow_mode; +- u64 last_dir_ino_rm = 0; + bool did_overwrite = false; + bool is_orphan = false; + bool can_rename = true; +@@ -4483,7 +4525,7 @@ static int process_recorded_refs(struct + goto out; + } + } +- ret = dup_ref(cur, &check_dirs); ++ ret = record_check_dir_ref_in_tree(&rbtree_check_dirs, cur, &check_dirs); + if (ret < 0) + goto out; + } +@@ -4511,7 +4553,7 @@ static int process_recorded_refs(struct + } + + list_for_each_entry(cur, &sctx->deleted_refs, list) { +- ret = dup_ref(cur, &check_dirs); ++ ret = record_check_dir_ref_in_tree(&rbtree_check_dirs, cur, &check_dirs); + if (ret < 0) + goto out; + } +@@ -4521,7 +4563,7 @@ static int process_recorded_refs(struct + * We have a moved dir. Add the old parent to check_dirs + */ + cur = list_first_entry(&sctx->deleted_refs, struct recorded_ref, list); +- ret = dup_ref(cur, &check_dirs); ++ ret = record_check_dir_ref_in_tree(&rbtree_check_dirs, cur, &check_dirs); + if (ret < 0) + goto out; + } else if (!S_ISDIR(sctx->cur_inode_mode)) { +@@ -4555,7 +4597,7 @@ static int process_recorded_refs(struct + if (is_current_inode_path(sctx, cur->full_path)) + fs_path_reset(&sctx->cur_inode_path); + } +- ret = dup_ref(cur, &check_dirs); ++ ret = record_check_dir_ref_in_tree(&rbtree_check_dirs, cur, &check_dirs); + if (ret < 0) + goto out; + } +@@ -4598,8 +4640,7 @@ static int process_recorded_refs(struct + ret = cache_dir_utimes(sctx, cur->dir, cur->dir_gen); + if (ret < 0) + goto out; +- } else if (ret == inode_state_did_delete && +- cur->dir != last_dir_ino_rm) { ++ } else if (ret == inode_state_did_delete) { + ret = can_rmdir(sctx, cur->dir, cur->dir_gen); + if (ret < 0) + goto out; +@@ -4611,7 +4652,6 @@ static int process_recorded_refs(struct + ret = send_rmdir(sctx, valid_path); + if (ret < 0) + goto out; +- last_dir_ino_rm = cur->dir; + } + } + } diff --git a/queue-6.17/can-netlink-can_changelink-allow-disabling-of-automatic-restart.patch b/queue-6.17/can-netlink-can_changelink-allow-disabling-of-automatic-restart.patch new file mode 100644 index 0000000000..08efb24209 --- /dev/null +++ b/queue-6.17/can-netlink-can_changelink-allow-disabling-of-automatic-restart.patch @@ -0,0 +1,62 @@ +From 8e93ac51e4c6dc399fad59ec21f55f2cfb46d27c Mon Sep 17 00:00:00 2001 +From: Marc Kleine-Budde +Date: Mon, 20 Oct 2025 11:51:03 +0200 +Subject: can: netlink: can_changelink(): allow disabling of automatic restart + +From: Marc Kleine-Budde + +commit 8e93ac51e4c6dc399fad59ec21f55f2cfb46d27c upstream. + +Since the commit c1f3f9797c1f ("can: netlink: can_changelink(): fix NULL +pointer deref of struct can_priv::do_set_mode"), the automatic restart +delay can only be set for devices that implement the restart handler struct +can_priv::do_set_mode. As it makes no sense to configure a automatic +restart for devices that doesn't support it. + +However, since systemd commit 13ce5d4632e3 ("network/can: properly handle +CAN.RestartSec=0") [1], systemd-networkd correctly handles a restart delay +of "0" (i.e. the restart is disabled). Which means that a disabled restart +is always configured in the kernel. + +On systems with both changes active this causes that CAN interfaces that +don't implement a restart handler cannot be brought up by systemd-networkd. + +Solve this problem by allowing a delay of "0" to be configured, even if the +device does not implement a restart handler. + +[1] https://github.com/systemd/systemd/commit/13ce5d4632e395521e6205c954493c7fc1c4c6e0 + +Cc: stable@vger.kernel.org +Cc: Andrei Lalaev +Reported-by: Marc Kleine-Budde +Closes: https://lore.kernel.org/all/20251020-certain-arrogant-vole-of-sunshine-141841-mkl@pengutronix.de +Fixes: c1f3f9797c1f ("can: netlink: can_changelink(): fix NULL pointer deref of struct can_priv::do_set_mode") +Link: https://patch.msgid.link/20251020-netlink-fix-restart-v1-1-3f53c7f8520b@pengutronix.de +Signed-off-by: Marc Kleine-Budde +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/can/dev/netlink.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/drivers/net/can/dev/netlink.c ++++ b/drivers/net/can/dev/netlink.c +@@ -285,7 +285,9 @@ static int can_changelink(struct net_dev + } + + if (data[IFLA_CAN_RESTART_MS]) { +- if (!priv->do_set_mode) { ++ unsigned int restart_ms = nla_get_u32(data[IFLA_CAN_RESTART_MS]); ++ ++ if (restart_ms != 0 && !priv->do_set_mode) { + NL_SET_ERR_MSG(extack, + "Device doesn't support restart from Bus Off"); + return -EOPNOTSUPP; +@@ -294,7 +296,7 @@ static int can_changelink(struct net_dev + /* Do not allow changing restart delay while running */ + if (dev->flags & IFF_UP) + return -EBUSY; +- priv->restart_ms = nla_get_u32(data[IFLA_CAN_RESTART_MS]); ++ priv->restart_ms = restart_ms; + } + + if (data[IFLA_CAN_RESTART]) { diff --git a/queue-6.17/cifs-fix-tcp_server_info-credits-to-be-signed.patch b/queue-6.17/cifs-fix-tcp_server_info-credits-to-be-signed.patch new file mode 100644 index 0000000000..9d735ac0d6 --- /dev/null +++ b/queue-6.17/cifs-fix-tcp_server_info-credits-to-be-signed.patch @@ -0,0 +1,37 @@ +From 5b2ff4873aeab972f919d5aea11c51393322bf58 Mon Sep 17 00:00:00 2001 +From: David Howells +Date: Mon, 20 Oct 2025 09:40:02 +0100 +Subject: cifs: Fix TCP_Server_Info::credits to be signed + +From: David Howells + +commit 5b2ff4873aeab972f919d5aea11c51393322bf58 upstream. + +Fix TCP_Server_Info::credits to be signed, just as echo_credits and +oplock_credits are. This also fixes what ought to get at least a +compilation warning if not an outright error in *get_credits_field() as a +pointer to the unsigned server->credits field is passed back as a pointer +to a signed int. + +Signed-off-by: David Howells +cc: linux-cifs@vger.kernel.org +Cc: stable@vger.kernel.org +Acked-by: Paulo Alcantara (Red Hat) +Acked-by: Pavel Shilovskiy +Signed-off-by: Steve French +Signed-off-by: Greg Kroah-Hartman +--- + fs/smb/client/cifsglob.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/smb/client/cifsglob.h ++++ b/fs/smb/client/cifsglob.h +@@ -740,7 +740,7 @@ struct TCP_Server_Info { + bool nosharesock; + bool tcp_nodelay; + bool terminate; +- unsigned int credits; /* send no more requests at once */ ++ int credits; /* send no more requests at once */ + unsigned int max_credits; /* can override large 32000 default at mnt */ + unsigned int in_flight; /* number of requests on the wire to server */ + unsigned int max_in_flight; /* max number of requests that were on wire */ diff --git a/queue-6.17/devcoredump-fix-circular-locking-dependency-with-devcd-mutex.patch b/queue-6.17/devcoredump-fix-circular-locking-dependency-with-devcd-mutex.patch new file mode 100644 index 0000000000..978870c78d --- /dev/null +++ b/queue-6.17/devcoredump-fix-circular-locking-dependency-with-devcd-mutex.patch @@ -0,0 +1,383 @@ +From a91c8096590bd7801a26454789f2992094fe36da Mon Sep 17 00:00:00 2001 +From: Maarten Lankhorst +Date: Wed, 23 Jul 2025 16:24:16 +0200 +Subject: devcoredump: Fix circular locking dependency with devcd->mutex. + +From: Maarten Lankhorst + +commit a91c8096590bd7801a26454789f2992094fe36da upstream. + +The original code causes a circular locking dependency found by lockdep. + +====================================================== +WARNING: possible circular locking dependency detected +6.16.0-rc6-lgci-xe-xe-pw-151626v3+ #1 Tainted: G S U +------------------------------------------------------ +xe_fault_inject/5091 is trying to acquire lock: +ffff888156815688 ((work_completion)(&(&devcd->del_wk)->work)){+.+.}-{0:0}, at: __flush_work+0x25d/0x660 + +but task is already holding lock: + +ffff888156815620 (&devcd->mutex){+.+.}-{3:3}, at: dev_coredump_put+0x3f/0xa0 +which lock already depends on the new lock. +the existing dependency chain (in reverse order) is: +-> #2 (&devcd->mutex){+.+.}-{3:3}: + mutex_lock_nested+0x4e/0xc0 + devcd_data_write+0x27/0x90 + sysfs_kf_bin_write+0x80/0xf0 + kernfs_fop_write_iter+0x169/0x220 + vfs_write+0x293/0x560 + ksys_write+0x72/0xf0 + __x64_sys_write+0x19/0x30 + x64_sys_call+0x2bf/0x2660 + do_syscall_64+0x93/0xb60 + entry_SYSCALL_64_after_hwframe+0x76/0x7e +-> #1 (kn->active#236){++++}-{0:0}: + kernfs_drain+0x1e2/0x200 + __kernfs_remove+0xae/0x400 + kernfs_remove_by_name_ns+0x5d/0xc0 + remove_files+0x54/0x70 + sysfs_remove_group+0x3d/0xa0 + sysfs_remove_groups+0x2e/0x60 + device_remove_attrs+0xc7/0x100 + device_del+0x15d/0x3b0 + devcd_del+0x19/0x30 + process_one_work+0x22b/0x6f0 + worker_thread+0x1e8/0x3d0 + kthread+0x11c/0x250 + ret_from_fork+0x26c/0x2e0 + ret_from_fork_asm+0x1a/0x30 +-> #0 ((work_completion)(&(&devcd->del_wk)->work)){+.+.}-{0:0}: + __lock_acquire+0x1661/0x2860 + lock_acquire+0xc4/0x2f0 + __flush_work+0x27a/0x660 + flush_delayed_work+0x5d/0xa0 + dev_coredump_put+0x63/0xa0 + xe_driver_devcoredump_fini+0x12/0x20 [xe] + devm_action_release+0x12/0x30 + release_nodes+0x3a/0x120 + devres_release_all+0x8a/0xd0 + device_unbind_cleanup+0x12/0x80 + device_release_driver_internal+0x23a/0x280 + device_driver_detach+0x14/0x20 + unbind_store+0xaf/0xc0 + drv_attr_store+0x21/0x50 + sysfs_kf_write+0x4a/0x80 + kernfs_fop_write_iter+0x169/0x220 + vfs_write+0x293/0x560 + ksys_write+0x72/0xf0 + __x64_sys_write+0x19/0x30 + x64_sys_call+0x2bf/0x2660 + do_syscall_64+0x93/0xb60 + entry_SYSCALL_64_after_hwframe+0x76/0x7e +other info that might help us debug this: +Chain exists of: (work_completion)(&(&devcd->del_wk)->work) --> kn->active#236 --> &devcd->mutex + Possible unsafe locking scenario: + CPU0 CPU1 + ---- ---- + lock(&devcd->mutex); + lock(kn->active#236); + lock(&devcd->mutex); + lock((work_completion)(&(&devcd->del_wk)->work)); + *** DEADLOCK *** +5 locks held by xe_fault_inject/5091: + #0: ffff8881129f9488 (sb_writers#5){.+.+}-{0:0}, at: ksys_write+0x72/0xf0 + #1: ffff88810c755078 (&of->mutex#2){+.+.}-{3:3}, at: kernfs_fop_write_iter+0x123/0x220 + #2: ffff8881054811a0 (&dev->mutex){....}-{3:3}, at: device_release_driver_internal+0x55/0x280 + #3: ffff888156815620 (&devcd->mutex){+.+.}-{3:3}, at: dev_coredump_put+0x3f/0xa0 + #4: ffffffff8359e020 (rcu_read_lock){....}-{1:2}, at: __flush_work+0x72/0x660 +stack backtrace: +CPU: 14 UID: 0 PID: 5091 Comm: xe_fault_inject Tainted: G S U 6.16.0-rc6-lgci-xe-xe-pw-151626v3+ #1 PREEMPT_{RT,(lazy)} +Tainted: [S]=CPU_OUT_OF_SPEC, [U]=USER +Hardware name: Micro-Star International Co., Ltd. MS-7D25/PRO Z690-A DDR4(MS-7D25), BIOS 1.10 12/13/2021 +Call Trace: + + dump_stack_lvl+0x91/0xf0 + dump_stack+0x10/0x20 + print_circular_bug+0x285/0x360 + check_noncircular+0x135/0x150 + ? register_lock_class+0x48/0x4a0 + __lock_acquire+0x1661/0x2860 + lock_acquire+0xc4/0x2f0 + ? __flush_work+0x25d/0x660 + ? mark_held_locks+0x46/0x90 + ? __flush_work+0x25d/0x660 + __flush_work+0x27a/0x660 + ? __flush_work+0x25d/0x660 + ? trace_hardirqs_on+0x1e/0xd0 + ? __pfx_wq_barrier_func+0x10/0x10 + flush_delayed_work+0x5d/0xa0 + dev_coredump_put+0x63/0xa0 + xe_driver_devcoredump_fini+0x12/0x20 [xe] + devm_action_release+0x12/0x30 + release_nodes+0x3a/0x120 + devres_release_all+0x8a/0xd0 + device_unbind_cleanup+0x12/0x80 + device_release_driver_internal+0x23a/0x280 + ? bus_find_device+0xa8/0xe0 + device_driver_detach+0x14/0x20 + unbind_store+0xaf/0xc0 + drv_attr_store+0x21/0x50 + sysfs_kf_write+0x4a/0x80 + kernfs_fop_write_iter+0x169/0x220 + vfs_write+0x293/0x560 + ksys_write+0x72/0xf0 + __x64_sys_write+0x19/0x30 + x64_sys_call+0x2bf/0x2660 + do_syscall_64+0x93/0xb60 + ? __f_unlock_pos+0x15/0x20 + ? __x64_sys_getdents64+0x9b/0x130 + ? __pfx_filldir64+0x10/0x10 + ? do_syscall_64+0x1a2/0xb60 + ? clear_bhb_loop+0x30/0x80 + ? clear_bhb_loop+0x30/0x80 + entry_SYSCALL_64_after_hwframe+0x76/0x7e +RIP: 0033:0x76e292edd574 +Code: c7 00 16 00 00 00 b8 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 f3 0f 1e fa 80 3d d5 ea 0e 00 00 74 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 55 48 89 e5 48 83 ec 20 48 89 +RSP: 002b:00007fffe247a828 EFLAGS: 00000202 ORIG_RAX: 0000000000000001 +RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 000076e292edd574 +RDX: 000000000000000c RSI: 00006267f6306063 RDI: 000000000000000b +RBP: 000000000000000c R08: 000076e292fc4b20 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000202 R12: 00006267f6306063 +R13: 000000000000000b R14: 00006267e6859c00 R15: 000076e29322a000 + +xe 0000:03:00.0: [drm] Xe device coredump has been deleted. + +Fixes: 01daccf74832 ("devcoredump : Serialize devcd_del work") +Cc: Mukesh Ojha +Cc: Greg Kroah-Hartman +Cc: Johannes Berg +Cc: Rafael J. Wysocki +Cc: Danilo Krummrich +Cc: linux-kernel@vger.kernel.org +Cc: stable@vger.kernel.org # v6.1+ +Signed-off-by: Maarten Lankhorst +Cc: Matthew Brost +Acked-by: Mukesh Ojha +Link: https://lore.kernel.org/r/20250723142416.1020423-1-dev@lankhorst.se +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Greg Kroah-Hartman +--- + drivers/base/devcoredump.c | 136 +++++++++++++++++++++++++++------------------ + 1 file changed, 83 insertions(+), 53 deletions(-) + +--- a/drivers/base/devcoredump.c ++++ b/drivers/base/devcoredump.c +@@ -23,50 +23,46 @@ struct devcd_entry { + void *data; + size_t datalen; + /* +- * Here, mutex is required to serialize the calls to del_wk work between +- * user/kernel space which happens when devcd is added with device_add() +- * and that sends uevent to user space. User space reads the uevents, +- * and calls to devcd_data_write() which try to modify the work which is +- * not even initialized/queued from devcoredump. ++ * There are 2 races for which mutex is required. + * ++ * The first race is between device creation and userspace writing to ++ * schedule immediately destruction. + * ++ * This race is handled by arming the timer before device creation, but ++ * when device creation fails the timer still exists. + * +- * cpu0(X) cpu1(Y) ++ * To solve this, hold the mutex during device_add(), and set ++ * init_completed on success before releasing the mutex. + * +- * dev_coredump() uevent sent to user space +- * device_add() ======================> user space process Y reads the +- * uevents writes to devcd fd +- * which results into writes to ++ * That way the timer will never fire until device_add() is called, ++ * it will do nothing if init_completed is not set. The timer is also ++ * cancelled in that case. + * +- * devcd_data_write() +- * mod_delayed_work() +- * try_to_grab_pending() +- * timer_delete() +- * debug_assert_init() +- * INIT_DELAYED_WORK() +- * schedule_delayed_work() +- * +- * +- * Also, mutex alone would not be enough to avoid scheduling of +- * del_wk work after it get flush from a call to devcd_free() +- * mentioned as below. +- * +- * disabled_store() +- * devcd_free() +- * mutex_lock() devcd_data_write() +- * flush_delayed_work() +- * mutex_unlock() +- * mutex_lock() +- * mod_delayed_work() +- * mutex_unlock() +- * So, delete_work flag is required. ++ * The second race involves multiple parallel invocations of devcd_free(), ++ * add a deleted flag so only 1 can call the destructor. + */ + struct mutex mutex; +- bool delete_work; ++ bool init_completed, deleted; + struct module *owner; + ssize_t (*read)(char *buffer, loff_t offset, size_t count, + void *data, size_t datalen); + void (*free)(void *data); ++ /* ++ * If nothing interferes and device_add() was returns success, ++ * del_wk will destroy the device after the timer fires. ++ * ++ * Multiple userspace processes can interfere in the working of the timer: ++ * - Writing to the coredump will reschedule the timer to run immediately, ++ * if still armed. ++ * ++ * This is handled by using "if (cancel_delayed_work()) { ++ * schedule_delayed_work() }", to prevent re-arming after having ++ * been previously fired. ++ * - Writing to /sys/class/devcoredump/disabled will destroy the ++ * coredump synchronously. ++ * This is handled by using disable_delayed_work_sync(), and then ++ * checking if deleted flag is set with &devcd->mutex held. ++ */ + struct delayed_work del_wk; + struct device *failing_dev; + }; +@@ -95,14 +91,27 @@ static void devcd_dev_release(struct dev + kfree(devcd); + } + ++static void __devcd_del(struct devcd_entry *devcd) ++{ ++ devcd->deleted = true; ++ device_del(&devcd->devcd_dev); ++ put_device(&devcd->devcd_dev); ++} ++ + static void devcd_del(struct work_struct *wk) + { + struct devcd_entry *devcd; ++ bool init_completed; + + devcd = container_of(wk, struct devcd_entry, del_wk.work); + +- device_del(&devcd->devcd_dev); +- put_device(&devcd->devcd_dev); ++ /* devcd->mutex serializes against dev_coredumpm_timeout */ ++ mutex_lock(&devcd->mutex); ++ init_completed = devcd->init_completed; ++ mutex_unlock(&devcd->mutex); ++ ++ if (init_completed) ++ __devcd_del(devcd); + } + + static ssize_t devcd_data_read(struct file *filp, struct kobject *kobj, +@@ -122,12 +131,12 @@ static ssize_t devcd_data_write(struct f + struct device *dev = kobj_to_dev(kobj); + struct devcd_entry *devcd = dev_to_devcd(dev); + +- mutex_lock(&devcd->mutex); +- if (!devcd->delete_work) { +- devcd->delete_work = true; +- mod_delayed_work(system_wq, &devcd->del_wk, 0); +- } +- mutex_unlock(&devcd->mutex); ++ /* ++ * Although it's tempting to use mod_delayed work here, ++ * that will cause a reschedule if the timer already fired. ++ */ ++ if (cancel_delayed_work(&devcd->del_wk)) ++ schedule_delayed_work(&devcd->del_wk, 0); + + return count; + } +@@ -151,11 +160,21 @@ static int devcd_free(struct device *dev + { + struct devcd_entry *devcd = dev_to_devcd(dev); + ++ /* ++ * To prevent a race with devcd_data_write(), disable work and ++ * complete manually instead. ++ * ++ * We cannot rely on the return value of ++ * disable_delayed_work_sync() here, because it might be in the ++ * middle of a cancel_delayed_work + schedule_delayed_work pair. ++ * ++ * devcd->mutex here guards against multiple parallel invocations ++ * of devcd_free(). ++ */ ++ disable_delayed_work_sync(&devcd->del_wk); + mutex_lock(&devcd->mutex); +- if (!devcd->delete_work) +- devcd->delete_work = true; +- +- flush_delayed_work(&devcd->del_wk); ++ if (!devcd->deleted) ++ __devcd_del(devcd); + mutex_unlock(&devcd->mutex); + return 0; + } +@@ -179,12 +198,10 @@ static ssize_t disabled_show(const struc + * put_device() <- last reference + * error = fn(dev, data) devcd_dev_release() + * devcd_free(dev, data) kfree(devcd) +- * mutex_lock(&devcd->mutex); + * + * + * In the above diagram, it looks like disabled_store() would be racing with parallelly +- * running devcd_del() and result in memory abort while acquiring devcd->mutex which +- * is called after kfree of devcd memory after dropping its last reference with ++ * running devcd_del() and result in memory abort after dropping its last reference with + * put_device(). However, this will not happens as fn(dev, data) runs + * with its own reference to device via klist_node so it is not its last reference. + * so, above situation would not occur. +@@ -374,7 +391,7 @@ void dev_coredumpm_timeout(struct device + devcd->read = read; + devcd->free = free; + devcd->failing_dev = get_device(dev); +- devcd->delete_work = false; ++ devcd->deleted = false; + + mutex_init(&devcd->mutex); + device_initialize(&devcd->devcd_dev); +@@ -383,8 +400,14 @@ void dev_coredumpm_timeout(struct device + atomic_inc_return(&devcd_count)); + devcd->devcd_dev.class = &devcd_class; + +- mutex_lock(&devcd->mutex); + dev_set_uevent_suppress(&devcd->devcd_dev, true); ++ ++ /* devcd->mutex prevents devcd_del() completing until init finishes */ ++ mutex_lock(&devcd->mutex); ++ devcd->init_completed = false; ++ INIT_DELAYED_WORK(&devcd->del_wk, devcd_del); ++ schedule_delayed_work(&devcd->del_wk, timeout); ++ + if (device_add(&devcd->devcd_dev)) + goto put_device; + +@@ -401,13 +424,20 @@ void dev_coredumpm_timeout(struct device + + dev_set_uevent_suppress(&devcd->devcd_dev, false); + kobject_uevent(&devcd->devcd_dev.kobj, KOBJ_ADD); +- INIT_DELAYED_WORK(&devcd->del_wk, devcd_del); +- schedule_delayed_work(&devcd->del_wk, timeout); ++ ++ /* ++ * Safe to run devcd_del() now that we are done with devcd_dev. ++ * Alternatively we could have taken a ref on devcd_dev before ++ * dropping the lock. ++ */ ++ devcd->init_completed = true; + mutex_unlock(&devcd->mutex); + return; + put_device: +- put_device(&devcd->devcd_dev); + mutex_unlock(&devcd->mutex); ++ cancel_delayed_work_sync(&devcd->del_wk); ++ put_device(&devcd->devcd_dev); ++ + put_module: + module_put(owner); + free: diff --git a/queue-6.17/dma-debug-don-t-report-false-positives-with-dma_bounce_unaligned_kmalloc.patch b/queue-6.17/dma-debug-don-t-report-false-positives-with-dma_bounce_unaligned_kmalloc.patch new file mode 100644 index 0000000000..570d2d3cdd --- /dev/null +++ b/queue-6.17/dma-debug-don-t-report-false-positives-with-dma_bounce_unaligned_kmalloc.patch @@ -0,0 +1,59 @@ +From 03521c892bb8d0712c23e158ae9bdf8705897df8 Mon Sep 17 00:00:00 2001 +From: Marek Szyprowski +Date: Thu, 9 Oct 2025 16:15:08 +0200 +Subject: dma-debug: don't report false positives with DMA_BOUNCE_UNALIGNED_KMALLOC + +From: Marek Szyprowski + +commit 03521c892bb8d0712c23e158ae9bdf8705897df8 upstream. + +Commit 370645f41e6e ("dma-mapping: force bouncing if the kmalloc() size is +not cache-line-aligned") introduced DMA_BOUNCE_UNALIGNED_KMALLOC feature +and permitted architecture specific code configure kmalloc slabs with +sizes smaller than the value of dma_get_cache_alignment(). + +When that feature is enabled, the physical address of some small +kmalloc()-ed buffers might be not aligned to the CPU cachelines, thus not +really suitable for typical DMA. To properly handle that case a SWIOTLB +buffer bouncing is used, so no CPU cache corruption occurs. When that +happens, there is no point reporting a false-positive DMA-API warning that +the buffer is not properly aligned, as this is not a client driver fault. + +[m.szyprowski@samsung.com: replace is_swiotlb_allocated() with is_swiotlb_active(), per Catalin] + Link: https://lkml.kernel.org/r/20251010173009.3916215-1-m.szyprowski@samsung.com +Link: https://lkml.kernel.org/r/20251009141508.2342138-1-m.szyprowski@samsung.com +Fixes: 370645f41e6e ("dma-mapping: force bouncing if the kmalloc() size is not cache-line-aligned") +Signed-off-by: Marek Szyprowski +Reviewed-by: Catalin Marinas +Cc: Christoph Hellwig +Cc: Inki Dae +Cc: Robin Murohy +Cc: "Isaac J. Manjarres" +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + kernel/dma/debug.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/kernel/dma/debug.c ++++ b/kernel/dma/debug.c +@@ -23,6 +23,7 @@ + #include + #include + #include ++#include + #include + #include "debug.h" + +@@ -594,7 +595,9 @@ static void add_dma_entry(struct dma_deb + if (rc == -ENOMEM) { + pr_err_once("cacheline tracking ENOMEM, dma-debug disabled\n"); + global_disable = true; +- } else if (rc == -EEXIST && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) { ++ } else if (rc == -EEXIST && !(attrs & DMA_ATTR_SKIP_CPU_SYNC) && ++ !(IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC) && ++ is_swiotlb_active(entry->dev))) { + err_printk(entry->dev, entry, + "cacheline tracking EEXIST, overlapping mappings aren't supported\n"); + } diff --git a/queue-6.17/drm-amd-display-increase-max-link-count-and-fix-link-enc-null-pointer-access.patch b/queue-6.17/drm-amd-display-increase-max-link-count-and-fix-link-enc-null-pointer-access.patch new file mode 100644 index 0000000000..210841e116 --- /dev/null +++ b/queue-6.17/drm-amd-display-increase-max-link-count-and-fix-link-enc-null-pointer-access.patch @@ -0,0 +1,60 @@ +From bec947cbe9a65783adb475a5fb47980d7b4f4796 Mon Sep 17 00:00:00 2001 +From: Charlene Liu +Date: Mon, 29 Sep 2025 20:29:30 -0400 +Subject: drm/amd/display: increase max link count and fix link->enc NULL pointer access + +From: Charlene Liu + +commit bec947cbe9a65783adb475a5fb47980d7b4f4796 upstream. + +[why] +1.) dc->links[MAX_LINKS] array size smaller than actual requested. +max_connector + max_dpia + 4 virtual = 14. +increase from 12 to 14. + +2.) hw_init() access null LINK_ENC for dpia non display_endpoint. + +Cc: Mario Limonciello +Cc: Alex Deucher +Reviewed-by: Meenakshikumar Somasundaram +Reviewed-by: Chris Park +Signed-off-by: Charlene Liu +Signed-off-by: Aurabindo Pillai +Signed-off-by: Alex Deucher +(cherry picked from commit d7f5a61e1b04ed87b008c8d327649d184dc5bb45) +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 3 +++ + drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h | 8 +++++++- + 2 files changed, 10 insertions(+), 1 deletion(-) + +--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c ++++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +@@ -200,6 +200,9 @@ void dcn401_init_hw(struct dc *dc) + */ + struct dc_link *link = dc->links[i]; + ++ if (link->ep_type != DISPLAY_ENDPOINT_PHY) ++ continue; ++ + link->link_enc->funcs->hw_init(link->link_enc); + + /* Check for enabled DIG to identify enabled display */ +--- a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h ++++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h +@@ -44,7 +44,13 @@ + */ + #define MAX_PIPES 6 + #define MAX_PHANTOM_PIPES (MAX_PIPES / 2) +-#define MAX_LINKS (MAX_PIPES * 2 +2) ++ ++#define MAX_DPIA 6 ++#define MAX_CONNECTOR 6 ++#define MAX_VIRTUAL_LINKS 4 ++ ++#define MAX_LINKS (MAX_DPIA + MAX_CONNECTOR + MAX_VIRTUAL_LINKS) ++ + #define MAX_DIG_LINK_ENCODERS 7 + #define MAX_DWB_PIPES 1 + #define MAX_HPO_DP2_ENCODERS 4 diff --git a/queue-6.17/drm-xe-check-return-value-of-ggtt-workqueue-allocation.patch b/queue-6.17/drm-xe-check-return-value-of-ggtt-workqueue-allocation.patch new file mode 100644 index 0000000000..28557e42d0 --- /dev/null +++ b/queue-6.17/drm-xe-check-return-value-of-ggtt-workqueue-allocation.patch @@ -0,0 +1,37 @@ +From ce29214ada6d08dbde1eeb5a69c3b09ddf3da146 Mon Sep 17 00:00:00 2001 +From: Matthew Brost +Date: Tue, 21 Oct 2025 17:55:36 -0700 +Subject: drm/xe: Check return value of GGTT workqueue allocation + +From: Matthew Brost + +commit ce29214ada6d08dbde1eeb5a69c3b09ddf3da146 upstream. + +Workqueue allocation can fail, so check the return value of the GGTT +workqueue allocation and fail driver initialization if the allocation +fails. + +Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") +Cc: stable@vger.kernel.org +Signed-off-by: Matthew Brost +Reviewed-by: Matthew Auld +Link: https://lore.kernel.org/r/20251022005538.828980-2-matthew.brost@intel.com +(cherry picked from commit 1f1314e8e71385bae319e43082b798c11f6648bc) +Signed-off-by: Lucas De Marchi +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/xe/xe_ggtt.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/gpu/drm/xe/xe_ggtt.c ++++ b/drivers/gpu/drm/xe/xe_ggtt.c +@@ -291,6 +291,9 @@ int xe_ggtt_init_early(struct xe_ggtt *g + ggtt->pt_ops = &xelp_pt_ops; + + ggtt->wq = alloc_workqueue("xe-ggtt-wq", 0, WQ_MEM_RECLAIM); ++ if (!ggtt->wq) ++ return -ENOMEM; ++ + __xe_ggtt_init_early(ggtt, xe_wopcm_size(xe)); + + err = drmm_add_action_or_reset(&xe->drm, ggtt_fini_early, ggtt); diff --git a/queue-6.17/fs-notify-call-exportfs_encode_fid-with-s_umount.patch b/queue-6.17/fs-notify-call-exportfs_encode_fid-with-s_umount.patch new file mode 100644 index 0000000000..e3430719c9 --- /dev/null +++ b/queue-6.17/fs-notify-call-exportfs_encode_fid-with-s_umount.patch @@ -0,0 +1,106 @@ +From a7c4bb43bfdc2b9f06ee9d036028ed13a83df42a Mon Sep 17 00:00:00 2001 +From: Jakub Acs +Date: Wed, 1 Oct 2025 10:09:55 +0000 +Subject: fs/notify: call exportfs_encode_fid with s_umount + +From: Jakub Acs + +commit a7c4bb43bfdc2b9f06ee9d036028ed13a83df42a upstream. + +Calling intotify_show_fdinfo() on fd watching an overlayfs inode, while +the overlayfs is being unmounted, can lead to dereferencing NULL ptr. + +This issue was found by syzkaller. + +Race Condition Diagram: + +Thread 1 Thread 2 +-------- -------- + +generic_shutdown_super() + shrink_dcache_for_umount + sb->s_root = NULL + + | + | vfs_read() + | inotify_fdinfo() + | * inode get from mark * + | show_mark_fhandle(m, inode) + | exportfs_encode_fid(inode, ..) + | ovl_encode_fh(inode, ..) + | ovl_check_encode_origin(inode) + | * deref i_sb->s_root * + | + | + v + fsnotify_sb_delete(sb) + +Which then leads to: + +[ 32.133461] Oops: general protection fault, probably for non-canonical address 0xdffffc0000000006: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN NOPTI +[ 32.134438] KASAN: null-ptr-deref in range [0x0000000000000030-0x0000000000000037] +[ 32.135032] CPU: 1 UID: 0 PID: 4468 Comm: systemd-coredum Not tainted 6.17.0-rc6 #22 PREEMPT(none) + + + +[ 32.143353] Call Trace: +[ 32.143732] ovl_encode_fh+0xd5/0x170 +[ 32.144031] exportfs_encode_inode_fh+0x12f/0x300 +[ 32.144425] show_mark_fhandle+0xbe/0x1f0 +[ 32.145805] inotify_fdinfo+0x226/0x2d0 +[ 32.146442] inotify_show_fdinfo+0x1c5/0x350 +[ 32.147168] seq_show+0x530/0x6f0 +[ 32.147449] seq_read_iter+0x503/0x12a0 +[ 32.148419] seq_read+0x31f/0x410 +[ 32.150714] vfs_read+0x1f0/0x9e0 +[ 32.152297] ksys_read+0x125/0x240 + +IOW ovl_check_encode_origin derefs inode->i_sb->s_root, after it was set +to NULL in the unmount path. + +Fix it by protecting calling exportfs_encode_fid() from +show_mark_fhandle() with s_umount lock. + +This form of fix was suggested by Amir in [1]. + +[1]: https://lore.kernel.org/all/CAOQ4uxhbDwhb+2Brs1UdkoF0a3NSdBAOQPNfEHjahrgoKJpLEw@mail.gmail.com/ + +Fixes: c45beebfde34 ("ovl: support encoding fid from inode with no alias") +Signed-off-by: Jakub Acs +Cc: Jan Kara +Cc: Amir Goldstein +Cc: Miklos Szeredi +Cc: Christian Brauner +Cc: linux-unionfs@vger.kernel.org +Cc: linux-fsdevel@vger.kernel.org +Cc: linux-kernel@vger.kernel.org +Cc: stable@vger.kernel.org +Signed-off-by: Jan Kara +Signed-off-by: Greg Kroah-Hartman +--- + fs/notify/fdinfo.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/fs/notify/fdinfo.c ++++ b/fs/notify/fdinfo.c +@@ -17,6 +17,7 @@ + #include "fanotify/fanotify.h" + #include "fdinfo.h" + #include "fsnotify.h" ++#include "../internal.h" + + #if defined(CONFIG_PROC_FS) + +@@ -46,7 +47,12 @@ static void show_mark_fhandle(struct seq + + size = f->handle_bytes >> 2; + ++ if (!super_trylock_shared(inode->i_sb)) ++ return; ++ + ret = exportfs_encode_fid(inode, (struct fid *)f->f_handle, &size); ++ up_read(&inode->i_sb->s_umount); ++ + if ((ret == FILEID_INVALID) || (ret < 0)) + return; + diff --git a/queue-6.17/gpio-104-idio-16-define-maximum-valid-register-address-offset.patch b/queue-6.17/gpio-104-idio-16-define-maximum-valid-register-address-offset.patch new file mode 100644 index 0000000000..5c297bbaa5 --- /dev/null +++ b/queue-6.17/gpio-104-idio-16-define-maximum-valid-register-address-offset.patch @@ -0,0 +1,39 @@ +From c4d35e635f3a65aec291a6045cae8c99cede5bba Mon Sep 17 00:00:00 2001 +From: William Breathitt Gray +Date: Mon, 20 Oct 2025 17:51:44 +0900 +Subject: gpio: 104-idio-16: Define maximum valid register address offset + +From: William Breathitt Gray + +commit c4d35e635f3a65aec291a6045cae8c99cede5bba upstream. + +Attempting to load the 104-idio-16 module fails during regmap +initialization with a return error -EINVAL. This is a result of the +regmap cache failing initialization. Set the idio_16_regmap_config +max_register member to fix this failure. + +Fixes: 2c210c9a34a3 ("gpio: 104-idio-16: Migrate to the regmap API") +Reported-by: Mark Cave-Ayland +Closes: https://lore.kernel.org/r/9b0375fd-235f-4ee1-a7fa-daca296ef6bf@nutanix.com +Suggested-by: Mark Cave-Ayland +Cc: stable@vger.kernel.org +Reviewed-by: Andy Shevchenko +Signed-off-by: William Breathitt Gray +Reviewed-by: Linus Walleij +Link: https://lore.kernel.org/r/20251020-fix-gpio-idio-16-regmap-v2-1-ebeb50e93c33@kernel.org +Signed-off-by: Bartosz Golaszewski +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpio/gpio-104-idio-16.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/gpio/gpio-104-idio-16.c ++++ b/drivers/gpio/gpio-104-idio-16.c +@@ -59,6 +59,7 @@ static const struct regmap_config idio_1 + .reg_stride = 1, + .val_bits = 8, + .io_port = true, ++ .max_register = 0x5, + .wr_table = &idio_16_wr_table, + .rd_table = &idio_16_rd_table, + .volatile_table = &idio_16_rd_table, diff --git a/queue-6.17/gpio-pci-idio-16-define-maximum-valid-register-address-offset.patch b/queue-6.17/gpio-pci-idio-16-define-maximum-valid-register-address-offset.patch new file mode 100644 index 0000000000..a534bef424 --- /dev/null +++ b/queue-6.17/gpio-pci-idio-16-define-maximum-valid-register-address-offset.patch @@ -0,0 +1,39 @@ +From d37623132a6347b4ab9e2179eb3f2fa77863c364 Mon Sep 17 00:00:00 2001 +From: William Breathitt Gray +Date: Mon, 20 Oct 2025 17:51:45 +0900 +Subject: gpio: pci-idio-16: Define maximum valid register address offset + +From: William Breathitt Gray + +commit d37623132a6347b4ab9e2179eb3f2fa77863c364 upstream. + +Attempting to load the pci-idio-16 module fails during regmap +initialization with a return error -EINVAL. This is a result of the +regmap cache failing initialization. Set the idio_16_regmap_config +max_register member to fix this failure. + +Fixes: 73d8f3efc5c2 ("gpio: pci-idio-16: Migrate to the regmap API") +Reported-by: Mark Cave-Ayland +Closes: https://lore.kernel.org/r/9b0375fd-235f-4ee1-a7fa-daca296ef6bf@nutanix.com +Suggested-by: Mark Cave-Ayland +Cc: stable@vger.kernel.org +Reviewed-by: Andy Shevchenko +Signed-off-by: William Breathitt Gray +Reviewed-by: Linus Walleij +Link: https://lore.kernel.org/r/20251020-fix-gpio-idio-16-regmap-v2-2-ebeb50e93c33@kernel.org +Signed-off-by: Bartosz Golaszewski +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpio/gpio-pci-idio-16.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/gpio/gpio-pci-idio-16.c ++++ b/drivers/gpio/gpio-pci-idio-16.c +@@ -41,6 +41,7 @@ static const struct regmap_config idio_1 + .reg_stride = 1, + .val_bits = 8, + .io_port = true, ++ .max_register = 0x7, + .wr_table = &idio_16_wr_table, + .rd_table = &idio_16_rd_table, + .volatile_table = &idio_16_rd_table, diff --git a/queue-6.17/hung_task-fix-warnings-caused-by-unaligned-lock-pointers.patch b/queue-6.17/hung_task-fix-warnings-caused-by-unaligned-lock-pointers.patch new file mode 100644 index 0000000000..56cf17c723 --- /dev/null +++ b/queue-6.17/hung_task-fix-warnings-caused-by-unaligned-lock-pointers.patch @@ -0,0 +1,89 @@ +From c97513cddcfc235f2522617980838e500af21d01 Mon Sep 17 00:00:00 2001 +From: Lance Yang +Date: Tue, 9 Sep 2025 22:52:43 +0800 +Subject: hung_task: fix warnings caused by unaligned lock pointers + +From: Lance Yang + +commit c97513cddcfc235f2522617980838e500af21d01 upstream. + +The blocker tracking mechanism assumes that lock pointers are at least +4-byte aligned to use their lower bits for type encoding. + +However, as reported by Eero Tamminen, some architectures like m68k +only guarantee 2-byte alignment of 32-bit values. This breaks the +assumption and causes two related WARN_ON_ONCE checks to trigger. + +To fix this, the runtime checks are adjusted to silently ignore any lock +that is not 4-byte aligned, effectively disabling the feature in such +cases and avoiding the related warnings. + +Thanks to Geert Uytterhoeven for bisecting! + +Link: https://lkml.kernel.org/r/20250909145243.17119-1-lance.yang@linux.dev +Fixes: e711faaafbe5 ("hung_task: replace blocker_mutex with encoded blocker") +Signed-off-by: Lance Yang +Reported-by: Eero Tamminen +Closes: https://lore.kernel.org/lkml/CAMuHMdW7Ab13DdGs2acMQcix5ObJK0O2dG_Fxzr8_g58Rc1_0g@mail.gmail.com +Reviewed-by: Masami Hiramatsu (Google) +Cc: John Paul Adrian Glaubitz +Cc: Anna Schumaker +Cc: Boqun Feng +Cc: Finn Thain +Cc: Geert Uytterhoeven +Cc: Ingo Molnar +Cc: Joel Granados +Cc: John Stultz +Cc: Kent Overstreet +Cc: Lance Yang +Cc: Mingzhe Yang +Cc: Peter Zijlstra +Cc: Sergey Senozhatsky +Cc: Steven Rostedt +Cc: Tomasz Figa +Cc: Waiman Long +Cc: Will Deacon +Cc: Yongliang Gao +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/hung_task.h | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/include/linux/hung_task.h b/include/linux/hung_task.h +index 34e615c76ca5..c4403eeb7144 100644 +--- a/include/linux/hung_task.h ++++ b/include/linux/hung_task.h +@@ -20,6 +20,10 @@ + * always zero. So we can use these bits to encode the specific blocking + * type. + * ++ * Note that on architectures where this is not guaranteed, or for any ++ * unaligned lock, this tracking mechanism is silently skipped for that ++ * lock. ++ * + * Type encoding: + * 00 - Blocked on mutex (BLOCKER_TYPE_MUTEX) + * 01 - Blocked on semaphore (BLOCKER_TYPE_SEM) +@@ -45,7 +49,7 @@ static inline void hung_task_set_blocker(void *lock, unsigned long type) + * If the lock pointer matches the BLOCKER_TYPE_MASK, return + * without writing anything. + */ +- if (WARN_ON_ONCE(lock_ptr & BLOCKER_TYPE_MASK)) ++ if (lock_ptr & BLOCKER_TYPE_MASK) + return; + + WRITE_ONCE(current->blocker, lock_ptr | type); +@@ -53,8 +57,6 @@ static inline void hung_task_set_blocker(void *lock, unsigned long type) + + static inline void hung_task_clear_blocker(void) + { +- WARN_ON_ONCE(!READ_ONCE(current->blocker)); +- + WRITE_ONCE(current->blocker, 0UL); + } + +-- +2.51.1 + diff --git a/queue-6.17/hwmon-pmbus-max34440-update-adpm12160-coeff-due-to-latest-fw.patch b/queue-6.17/hwmon-pmbus-max34440-update-adpm12160-coeff-due-to-latest-fw.patch new file mode 100644 index 0000000000..99354bd18e --- /dev/null +++ b/queue-6.17/hwmon-pmbus-max34440-update-adpm12160-coeff-due-to-latest-fw.patch @@ -0,0 +1,51 @@ +From 41de7440e6a00b8e70a068c50e3fba2f56302e8a Mon Sep 17 00:00:00 2001 +From: Alexis Czezar Torreno +Date: Wed, 1 Oct 2025 08:37:07 +0800 +Subject: hwmon: (pmbus/max34440) Update adpm12160 coeff due to latest FW + +From: Alexis Czezar Torreno + +commit 41de7440e6a00b8e70a068c50e3fba2f56302e8a upstream. + +adpm12160 is a dc-dc power module. The firmware was updated and the +coeeficients in the pmbus_driver_info needs to be updated. Since the +part has not yet released with older FW, this permanent change to +reflect the latest should be ok. + +Signed-off-by: Alexis Czezar Torreno +Link: https://lore.kernel.org/r/20251001-hwmon-next-v1-1-f8ca6a648203@analog.com +Fixes: 629cf8f6c23a ("hwmon: (pmbus/max34440) Add support for ADPM12160") +Cc: stable@vger.kernel.org # v6.16+ +Signed-off-by: Guenter Roeck +Signed-off-by: Greg Kroah-Hartman +--- + drivers/hwmon/pmbus/max34440.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +--- a/drivers/hwmon/pmbus/max34440.c ++++ b/drivers/hwmon/pmbus/max34440.c +@@ -336,18 +336,18 @@ static struct pmbus_driver_info max34440 + .format[PSC_CURRENT_IN] = direct, + .format[PSC_CURRENT_OUT] = direct, + .format[PSC_TEMPERATURE] = direct, +- .m[PSC_VOLTAGE_IN] = 1, ++ .m[PSC_VOLTAGE_IN] = 125, + .b[PSC_VOLTAGE_IN] = 0, + .R[PSC_VOLTAGE_IN] = 0, +- .m[PSC_VOLTAGE_OUT] = 1, ++ .m[PSC_VOLTAGE_OUT] = 125, + .b[PSC_VOLTAGE_OUT] = 0, + .R[PSC_VOLTAGE_OUT] = 0, +- .m[PSC_CURRENT_IN] = 1, ++ .m[PSC_CURRENT_IN] = 250, + .b[PSC_CURRENT_IN] = 0, +- .R[PSC_CURRENT_IN] = 2, +- .m[PSC_CURRENT_OUT] = 1, ++ .R[PSC_CURRENT_IN] = -1, ++ .m[PSC_CURRENT_OUT] = 250, + .b[PSC_CURRENT_OUT] = 0, +- .R[PSC_CURRENT_OUT] = 2, ++ .R[PSC_CURRENT_OUT] = -1, + .m[PSC_TEMPERATURE] = 1, + .b[PSC_TEMPERATURE] = 0, + .R[PSC_TEMPERATURE] = 2, diff --git a/queue-6.17/io_uring-sqpoll-be-smarter-on-when-to-update-the-stime-usage.patch b/queue-6.17/io_uring-sqpoll-be-smarter-on-when-to-update-the-stime-usage.patch new file mode 100644 index 0000000000..65ff7b6925 --- /dev/null +++ b/queue-6.17/io_uring-sqpoll-be-smarter-on-when-to-update-the-stime-usage.patch @@ -0,0 +1,150 @@ +From a94e0657269c5b8e1a90b17aa2c048b3d276e16d Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Tue, 21 Oct 2025 11:44:39 -0600 +Subject: io_uring/sqpoll: be smarter on when to update the stime usage + +From: Jens Axboe + +commit a94e0657269c5b8e1a90b17aa2c048b3d276e16d upstream. + +The current approach is a bit naive, and hence calls the time querying +way too often. Only start the "doing work" timer when there's actual +work to do, and then use that information to terminate (and account) the +work time once done. This greatly reduces the frequency of these calls, +when they cannot have changed anyway. + +Running a basic random reader that is setup to use SQPOLL, a profile +before this change shows these as the top cycle consumers: + ++ 32.60% iou-sqp-1074 [kernel.kallsyms] [k] thread_group_cputime_adjusted ++ 19.97% iou-sqp-1074 [kernel.kallsyms] [k] thread_group_cputime ++ 12.20% io_uring io_uring [.] submitter_uring_fn ++ 4.13% iou-sqp-1074 [kernel.kallsyms] [k] getrusage ++ 2.45% iou-sqp-1074 [kernel.kallsyms] [k] io_submit_sqes ++ 2.18% iou-sqp-1074 [kernel.kallsyms] [k] __pi_memset_generic ++ 2.09% iou-sqp-1074 [kernel.kallsyms] [k] cputime_adjust + +and after this change, top of profile looks as follows: + ++ 36.23% io_uring io_uring [.] submitter_uring_fn ++ 23.26% iou-sqp-819 [kernel.kallsyms] [k] io_sq_thread ++ 10.14% iou-sqp-819 [kernel.kallsyms] [k] io_sq_tw ++ 6.52% iou-sqp-819 [kernel.kallsyms] [k] tctx_task_work_run ++ 4.82% iou-sqp-819 [kernel.kallsyms] [k] nvme_submit_cmds.part.0 ++ 2.91% iou-sqp-819 [kernel.kallsyms] [k] io_submit_sqes +[...] + 0.02% iou-sqp-819 [kernel.kallsyms] [k] cputime_adjust + +where it's spending the cycles on things that actually matter. + +Reported-by: Fengnan Chang +Cc: stable@vger.kernel.org +Fixes: 3fcb9d17206e ("io_uring/sqpoll: statistics of the true utilization of sq threads") +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/sqpoll.c | 43 ++++++++++++++++++++++++++++++++----------- + 1 file changed, 32 insertions(+), 11 deletions(-) + +--- a/io_uring/sqpoll.c ++++ b/io_uring/sqpoll.c +@@ -170,6 +170,11 @@ static inline bool io_sqd_events_pending + return READ_ONCE(sqd->state); + } + ++struct io_sq_time { ++ bool started; ++ u64 usec; ++}; ++ + u64 io_sq_cpu_usec(struct task_struct *tsk) + { + u64 utime, stime; +@@ -179,12 +184,24 @@ u64 io_sq_cpu_usec(struct task_struct *t + return stime; + } + +-static void io_sq_update_worktime(struct io_sq_data *sqd, u64 usec) ++static void io_sq_update_worktime(struct io_sq_data *sqd, struct io_sq_time *ist) + { +- sqd->work_time += io_sq_cpu_usec(current) - usec; ++ if (!ist->started) ++ return; ++ ist->started = false; ++ sqd->work_time += io_sq_cpu_usec(current) - ist->usec; + } + +-static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries) ++static void io_sq_start_worktime(struct io_sq_time *ist) ++{ ++ if (ist->started) ++ return; ++ ist->started = true; ++ ist->usec = io_sq_cpu_usec(current); ++} ++ ++static int __io_sq_thread(struct io_ring_ctx *ctx, struct io_sq_data *sqd, ++ bool cap_entries, struct io_sq_time *ist) + { + unsigned int to_submit; + int ret = 0; +@@ -197,6 +214,8 @@ static int __io_sq_thread(struct io_ring + if (to_submit || !wq_list_empty(&ctx->iopoll_list)) { + const struct cred *creds = NULL; + ++ io_sq_start_worktime(ist); ++ + if (ctx->sq_creds != current_cred()) + creds = override_creds(ctx->sq_creds); + +@@ -278,7 +297,6 @@ static int io_sq_thread(void *data) + unsigned long timeout = 0; + char buf[TASK_COMM_LEN] = {}; + DEFINE_WAIT(wait); +- u64 start; + + /* offload context creation failed, just exit */ + if (!current->io_uring) { +@@ -313,6 +331,7 @@ static int io_sq_thread(void *data) + mutex_lock(&sqd->lock); + while (1) { + bool cap_entries, sqt_spin = false; ++ struct io_sq_time ist = { }; + + if (io_sqd_events_pending(sqd) || signal_pending(current)) { + if (io_sqd_handle_event(sqd)) +@@ -321,9 +340,8 @@ static int io_sq_thread(void *data) + } + + cap_entries = !list_is_singular(&sqd->ctx_list); +- start = io_sq_cpu_usec(current); + list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { +- int ret = __io_sq_thread(ctx, cap_entries); ++ int ret = __io_sq_thread(ctx, sqd, cap_entries, &ist); + + if (!sqt_spin && (ret > 0 || !wq_list_empty(&ctx->iopoll_list))) + sqt_spin = true; +@@ -331,15 +349,18 @@ static int io_sq_thread(void *data) + if (io_sq_tw(&retry_list, IORING_TW_CAP_ENTRIES_VALUE)) + sqt_spin = true; + +- list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) +- if (io_napi(ctx)) ++ list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { ++ if (io_napi(ctx)) { ++ io_sq_start_worktime(&ist); + io_napi_sqpoll_busy_poll(ctx); ++ } ++ } ++ ++ io_sq_update_worktime(sqd, &ist); + + if (sqt_spin || !time_after(jiffies, timeout)) { +- if (sqt_spin) { +- io_sq_update_worktime(sqd, start); ++ if (sqt_spin) + timeout = jiffies + sqd->sq_thread_idle; +- } + if (unlikely(need_resched())) { + mutex_unlock(&sqd->lock); + cond_resched(); diff --git a/queue-6.17/io_uring-sqpoll-switch-away-from-getrusage-for-cpu-accounting.patch b/queue-6.17/io_uring-sqpoll-switch-away-from-getrusage-for-cpu-accounting.patch new file mode 100644 index 0000000000..eec7a0dad0 --- /dev/null +++ b/queue-6.17/io_uring-sqpoll-switch-away-from-getrusage-for-cpu-accounting.patch @@ -0,0 +1,140 @@ +From 8ac9b0d33e5c0a995338ee5f25fe1b6ff7d97f65 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Tue, 21 Oct 2025 07:16:08 -0600 +Subject: io_uring/sqpoll: switch away from getrusage() for CPU accounting + +From: Jens Axboe + +commit 8ac9b0d33e5c0a995338ee5f25fe1b6ff7d97f65 upstream. + +getrusage() does a lot more than what the SQPOLL accounting needs, the +latter only cares about (and uses) the stime. Rather than do a full +RUSAGE_SELF summation, just query the used stime instead. + +Cc: stable@vger.kernel.org +Fixes: 3fcb9d17206e ("io_uring/sqpoll: statistics of the true utilization of sq threads") +Reviewed-by: Gabriel Krisman Bertazi +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/fdinfo.c | 8 ++++---- + io_uring/sqpoll.c | 32 ++++++++++++++++++-------------- + io_uring/sqpoll.h | 1 + + 3 files changed, 23 insertions(+), 18 deletions(-) + +--- a/io_uring/fdinfo.c ++++ b/io_uring/fdinfo.c +@@ -59,7 +59,6 @@ static void __io_uring_show_fdinfo(struc + { + struct io_overflow_cqe *ocqe; + struct io_rings *r = ctx->rings; +- struct rusage sq_usage; + unsigned int sq_mask = ctx->sq_entries - 1, cq_mask = ctx->cq_entries - 1; + unsigned int sq_head = READ_ONCE(r->sq.head); + unsigned int sq_tail = READ_ONCE(r->sq.tail); +@@ -150,14 +149,15 @@ static void __io_uring_show_fdinfo(struc + * thread termination. + */ + if (tsk) { ++ u64 usec; ++ + get_task_struct(tsk); + rcu_read_unlock(); +- getrusage(tsk, RUSAGE_SELF, &sq_usage); ++ usec = io_sq_cpu_usec(tsk); + put_task_struct(tsk); + sq_pid = sq->task_pid; + sq_cpu = sq->sq_cpu; +- sq_total_time = (sq_usage.ru_stime.tv_sec * 1000000 +- + sq_usage.ru_stime.tv_usec); ++ sq_total_time = usec; + sq_work_time = sq->work_time; + } else { + rcu_read_unlock(); +--- a/io_uring/sqpoll.c ++++ b/io_uring/sqpoll.c +@@ -11,6 +11,7 @@ + #include + #include + #include ++#include + #include + + #include +@@ -169,6 +170,20 @@ static inline bool io_sqd_events_pending + return READ_ONCE(sqd->state); + } + ++u64 io_sq_cpu_usec(struct task_struct *tsk) ++{ ++ u64 utime, stime; ++ ++ task_cputime_adjusted(tsk, &utime, &stime); ++ do_div(stime, 1000); ++ return stime; ++} ++ ++static void io_sq_update_worktime(struct io_sq_data *sqd, u64 usec) ++{ ++ sqd->work_time += io_sq_cpu_usec(current) - usec; ++} ++ + static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries) + { + unsigned int to_submit; +@@ -255,26 +270,15 @@ static bool io_sq_tw_pending(struct llis + return retry_list || !llist_empty(&tctx->task_list); + } + +-static void io_sq_update_worktime(struct io_sq_data *sqd, struct rusage *start) +-{ +- struct rusage end; +- +- getrusage(current, RUSAGE_SELF, &end); +- end.ru_stime.tv_sec -= start->ru_stime.tv_sec; +- end.ru_stime.tv_usec -= start->ru_stime.tv_usec; +- +- sqd->work_time += end.ru_stime.tv_usec + end.ru_stime.tv_sec * 1000000; +-} +- + static int io_sq_thread(void *data) + { + struct llist_node *retry_list = NULL; + struct io_sq_data *sqd = data; + struct io_ring_ctx *ctx; +- struct rusage start; + unsigned long timeout = 0; + char buf[TASK_COMM_LEN] = {}; + DEFINE_WAIT(wait); ++ u64 start; + + /* offload context creation failed, just exit */ + if (!current->io_uring) { +@@ -317,7 +321,7 @@ static int io_sq_thread(void *data) + } + + cap_entries = !list_is_singular(&sqd->ctx_list); +- getrusage(current, RUSAGE_SELF, &start); ++ start = io_sq_cpu_usec(current); + list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { + int ret = __io_sq_thread(ctx, cap_entries); + +@@ -333,7 +337,7 @@ static int io_sq_thread(void *data) + + if (sqt_spin || !time_after(jiffies, timeout)) { + if (sqt_spin) { +- io_sq_update_worktime(sqd, &start); ++ io_sq_update_worktime(sqd, start); + timeout = jiffies + sqd->sq_thread_idle; + } + if (unlikely(need_resched())) { +--- a/io_uring/sqpoll.h ++++ b/io_uring/sqpoll.h +@@ -29,6 +29,7 @@ void io_sq_thread_unpark(struct io_sq_da + void io_put_sq_data(struct io_sq_data *sqd); + void io_sqpoll_wait_sq(struct io_ring_ctx *ctx); + int io_sqpoll_wq_cpu_affinity(struct io_ring_ctx *ctx, cpumask_var_t mask); ++u64 io_sq_cpu_usec(struct task_struct *tsk); + + static inline struct task_struct *sqpoll_task_locked(struct io_sq_data *sqd) + { diff --git a/queue-6.17/mips-malta-fix-keyboard-resource-preventing-i8042-driver-from-registering.patch b/queue-6.17/mips-malta-fix-keyboard-resource-preventing-i8042-driver-from-registering.patch new file mode 100644 index 0000000000..1af7706c81 --- /dev/null +++ b/queue-6.17/mips-malta-fix-keyboard-resource-preventing-i8042-driver-from-registering.patch @@ -0,0 +1,65 @@ +From bf5570590a981d0659d0808d2d4bcda21b27a2a5 Mon Sep 17 00:00:00 2001 +From: "Maciej W. Rozycki" +Date: Tue, 21 Oct 2025 20:38:22 +0100 +Subject: MIPS: Malta: Fix keyboard resource preventing i8042 driver from registering +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Maciej W. Rozycki + +commit bf5570590a981d0659d0808d2d4bcda21b27a2a5 upstream. + +MIPS Malta platform code registers the PCI southbridge legacy port I/O +PS/2 keyboard range as a standard resource marked as busy. It prevents +the i8042 driver from registering as it fails to claim the resource in +a call to i8042_platform_init(). Consequently PS/2 keyboard and mouse +devices cannot be used with this platform. + +Fix the issue by removing the busy marker from the standard reservation, +making the driver register successfully: + + serio: i8042 KBD port at 0x60,0x64 irq 1 + serio: i8042 AUX port at 0x60,0x64 irq 12 + +and the resource show up as expected among the legacy devices: + + 00000000-00ffffff : MSC PCI I/O + 00000000-0000001f : dma1 + 00000020-00000021 : pic1 + 00000040-0000005f : timer + 00000060-0000006f : keyboard + 00000060-0000006f : i8042 + 00000070-00000077 : rtc0 + 00000080-0000008f : dma page reg + 000000a0-000000a1 : pic2 + 000000c0-000000df : dma2 + [...] + +If the i8042 driver has not been configured, then the standard resource +will remain there preventing any conflicting dynamic assignment of this +PCI port I/O address range. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Maciej W. Rozycki +Signed-off-by: Bjorn Helgaas +Reviewed-by: Ilpo Järvinen +Acked-by: Thomas Bogendoerfer +Cc: stable@vger.kernel.org +Link: https://patch.msgid.link/alpine.DEB.2.21.2510211919240.8377@angie.orcam.me.uk +Signed-off-by: Greg Kroah-Hartman +--- + arch/mips/mti-malta/malta-setup.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/mips/mti-malta/malta-setup.c ++++ b/arch/mips/mti-malta/malta-setup.c +@@ -47,7 +47,7 @@ static struct resource standard_io_resou + .name = "keyboard", + .start = 0x60, + .end = 0x6f, +- .flags = IORESOURCE_IO | IORESOURCE_BUSY ++ .flags = IORESOURCE_IO + }, + { + .name = "dma page reg", diff --git a/queue-6.17/mm-damon-core-fix-list_add_tail-call-on-damon_call.patch b/queue-6.17/mm-damon-core-fix-list_add_tail-call-on-damon_call.patch new file mode 100644 index 0000000000..0e6e5199bf --- /dev/null +++ b/queue-6.17/mm-damon-core-fix-list_add_tail-call-on-damon_call.patch @@ -0,0 +1,61 @@ +From c3fa5b1bfd8380d935fa961f2ac166bdf000f418 Mon Sep 17 00:00:00 2001 +From: SeongJae Park +Date: Tue, 14 Oct 2025 13:59:36 -0700 +Subject: mm/damon/core: fix list_add_tail() call on damon_call() + +From: SeongJae Park + +commit c3fa5b1bfd8380d935fa961f2ac166bdf000f418 upstream. + +Each damon_ctx maintains callback requests using a linked list +(damon_ctx->call_controls). When a new callback request is received via +damon_call(), the new request should be added to the list. However, the +function is making a mistake at list_add_tail() invocation: putting the +new item to add and the list head to add it before, in the opposite order. +Because of the linked list manipulation implementation, the new request +can still be reached from the context's list head. But the list items +that were added before the new request are dropped from the list. + +As a result, the callbacks are unexpectedly not invocated. Worse yet, if +the dropped callback requests were dynamically allocated, the memory is +leaked. Actually DAMON sysfs interface is using a dynamically allocated +repeat-mode callback request for automatic essential stats update. And +because the online DAMON parameters commit is using a non-repeat-mode +callback request, the issue can easily be reproduced, like below. + + # damo start --damos_action stat --refresh_stat 1s + # damo tune --damos_action stat --refresh_stat 1s + +The first command dynamically allocates the repeat-mode callback request +for automatic essential stat update. Users can see the essential stats +are automatically updated for every second, using the sysfs interface. + +The second command calls damon_commit() with a new callback request that +was made for the commit. As a result, the previously added repeat-mode +callback request is dropped from the list. The automatic stats refresh +stops working, and the memory for the repeat-mode callback request is +leaked. It can be confirmed using kmemleak. + +Fix the mistake on the list_add_tail() call. + +Link: https://lkml.kernel.org/r/20251014205939.1206-1-sj@kernel.org +Fixes: 004ded6bee11 ("mm/damon: accept parallel damon_call() requests") +Signed-off-by: SeongJae Park +Cc: [6.17+] +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/damon/core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/mm/damon/core.c ++++ b/mm/damon/core.c +@@ -1422,7 +1422,7 @@ int damon_call(struct damon_ctx *ctx, st + INIT_LIST_HEAD(&control->list); + + mutex_lock(&ctx->call_controls_lock); +- list_add_tail(&ctx->call_controls, &control->list); ++ list_add_tail(&control->list, &ctx->call_controls); + mutex_unlock(&ctx->call_controls_lock); + if (!damon_is_running(ctx)) + return -EINVAL; diff --git a/queue-6.17/mm-damon-core-fix-potential-memory-leak-by-cleaning-ops_filter-in-damon_destroy_scheme.patch b/queue-6.17/mm-damon-core-fix-potential-memory-leak-by-cleaning-ops_filter-in-damon_destroy_scheme.patch new file mode 100644 index 0000000000..eff648f57a --- /dev/null +++ b/queue-6.17/mm-damon-core-fix-potential-memory-leak-by-cleaning-ops_filter-in-damon_destroy_scheme.patch @@ -0,0 +1,40 @@ +From 7071537159be845a5c4ed5fb7d3db25aa4bd04a3 Mon Sep 17 00:00:00 2001 +From: Enze Li +Date: Tue, 14 Oct 2025 16:42:25 +0800 +Subject: mm/damon/core: fix potential memory leak by cleaning ops_filter in damon_destroy_scheme + +From: Enze Li + +commit 7071537159be845a5c4ed5fb7d3db25aa4bd04a3 upstream. + +Currently, damon_destroy_scheme() only cleans up the filter list but +leaves ops_filter untouched, which could lead to memory leaks when a +scheme is destroyed. + +This patch ensures both filter and ops_filter are properly freed in +damon_destroy_scheme(), preventing potential memory leaks. + +Link: https://lkml.kernel.org/r/20251014084225.313313-1-lienze@kylinos.cn +Fixes: ab82e57981d0 ("mm/damon/core: introduce damos->ops_filters") +Signed-off-by: Enze Li +Reviewed-by: SeongJae Park +Tested-by: SeongJae Park +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/damon/core.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/mm/damon/core.c ++++ b/mm/damon/core.c +@@ -451,6 +451,9 @@ void damon_destroy_scheme(struct damos * + damos_for_each_filter_safe(f, next, s) + damos_destroy_filter(f); + ++ damos_for_each_ops_filter_safe(f, next, s) ++ damos_destroy_filter(f); ++ + kfree(s->migrate_dests.node_id_arr); + kfree(s->migrate_dests.weight_arr); + damon_del_scheme(s); diff --git a/queue-6.17/mm-damon-core-use-damos_commit_quota_goal-for-new-goal-commit.patch b/queue-6.17/mm-damon-core-use-damos_commit_quota_goal-for-new-goal-commit.patch new file mode 100644 index 0000000000..02e0779799 --- /dev/null +++ b/queue-6.17/mm-damon-core-use-damos_commit_quota_goal-for-new-goal-commit.patch @@ -0,0 +1,48 @@ +From 7eca961dd7188f20fdf8ce9ed5018280f79b2438 Mon Sep 17 00:00:00 2001 +From: SeongJae Park +Date: Mon, 13 Oct 2025 17:18:44 -0700 +Subject: mm/damon/core: use damos_commit_quota_goal() for new goal commit + +From: SeongJae Park + +commit 7eca961dd7188f20fdf8ce9ed5018280f79b2438 upstream. + +When damos_commit_quota_goals() is called for adding new DAMOS quota goals +of DAMOS_QUOTA_USER_INPUT metric, current_value fields of the new goals +should be also set as requested. + +However, damos_commit_quota_goals() is not updating the field for the +case, since it is setting only metrics and target values using +damos_new_quota_goal(), and metric-optional union fields using +damos_commit_quota_goal_union(). As a result, users could see the first +current_value parameter that committed online with a new quota goal is +ignored. Users are assumed to commit the current_value for +DAMOS_QUOTA_USER_INPUT quota goals, since it is being used as a feedback. +Hence the real impact would be subtle. That said, this is obviously not +intended behavior. + +Fix the issue by using damos_commit_quota_goal() which sets all quota goal +parameters, instead of damos_commit_quota_goal_union(), which sets only +the union fields. + +Link: https://lkml.kernel.org/r/20251014001846.279282-1-sj@kernel.org +Fixes: 1aef9df0ee90 ("mm/damon/core: commit damos_quota_goal->nid") +Signed-off-by: SeongJae Park +Cc: [6.16+] +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/damon/core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/mm/damon/core.c ++++ b/mm/damon/core.c +@@ -811,7 +811,7 @@ int damos_commit_quota_goals(struct damo + src_goal->metric, src_goal->target_value); + if (!new_goal) + return -ENOMEM; +- damos_commit_quota_goal_union(new_goal, src_goal); ++ damos_commit_quota_goal(new_goal, src_goal); + damos_add_quota_goal(dst, new_goal); + } + return 0; diff --git a/queue-6.17/mm-damon-sysfs-catch-commit-test-ctx-alloc-failure.patch b/queue-6.17/mm-damon-sysfs-catch-commit-test-ctx-alloc-failure.patch new file mode 100644 index 0000000000..b24040ddc2 --- /dev/null +++ b/queue-6.17/mm-damon-sysfs-catch-commit-test-ctx-alloc-failure.patch @@ -0,0 +1,46 @@ +From f0c5118ebb0eb7e4fd6f0d2ace3315ca141b317f Mon Sep 17 00:00:00 2001 +From: SeongJae Park +Date: Fri, 3 Oct 2025 13:14:54 -0700 +Subject: mm/damon/sysfs: catch commit test ctx alloc failure + +From: SeongJae Park + +commit f0c5118ebb0eb7e4fd6f0d2ace3315ca141b317f upstream. + +Patch series "mm/damon/sysfs: fix commit test damon_ctx [de]allocation". + +DAMON sysfs interface dynamically allocates and uses a damon_ctx object +for testing if given inputs for online DAMON parameters update is valid. +The object is being used without an allocation failure check, and leaked +when the test succeeds. Fix the two bugs. + + +This patch (of 2): + +The damon_ctx for testing online DAMON parameters commit inputs is used +without its allocation failure check. This could result in an invalid +memory access. Fix it by directly returning an error when the allocation +failed. + +Link: https://lkml.kernel.org/r/20251003201455.41448-1-sj@kernel.org +Link: https://lkml.kernel.org/r/20251003201455.41448-2-sj@kernel.org +Fixes: 4c9ea539ad59 ("mm/damon/sysfs: validate user inputs from damon_sysfs_commit_input()") +Signed-off-by: SeongJae Park +Cc: [6.15+] +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/damon/sysfs.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/mm/damon/sysfs.c ++++ b/mm/damon/sysfs.c +@@ -1435,6 +1435,8 @@ static int damon_sysfs_commit_input(void + if (IS_ERR(param_ctx)) + return PTR_ERR(param_ctx); + test_ctx = damon_new_ctx(); ++ if (!test_ctx) ++ return -ENOMEM; + err = damon_commit_ctx(test_ctx, param_ctx); + if (err) { + damon_destroy_ctx(test_ctx); diff --git a/queue-6.17/mm-damon-sysfs-dealloc-commit-test-ctx-always.patch b/queue-6.17/mm-damon-sysfs-dealloc-commit-test-ctx-always.patch new file mode 100644 index 0000000000..77e599152b --- /dev/null +++ b/queue-6.17/mm-damon-sysfs-dealloc-commit-test-ctx-always.patch @@ -0,0 +1,41 @@ +From 139e7a572af0b45f558b5e502121a768dc328ba8 Mon Sep 17 00:00:00 2001 +From: SeongJae Park +Date: Fri, 3 Oct 2025 13:14:55 -0700 +Subject: mm/damon/sysfs: dealloc commit test ctx always + +From: SeongJae Park + +commit 139e7a572af0b45f558b5e502121a768dc328ba8 upstream. + +The damon_ctx for testing online DAMON parameters commit inputs is +deallocated only when the test fails. This means memory is leaked for +every successful online DAMON parameters commit. Fix the leak by always +deallocating it. + +Link: https://lkml.kernel.org/r/20251003201455.41448-3-sj@kernel.org +Fixes: 4c9ea539ad59 ("mm/damon/sysfs: validate user inputs from damon_sysfs_commit_input()") +Signed-off-by: SeongJae Park +Cc: [6.15+] +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/damon/sysfs.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/mm/damon/sysfs.c ++++ b/mm/damon/sysfs.c +@@ -1438,12 +1438,11 @@ static int damon_sysfs_commit_input(void + if (!test_ctx) + return -ENOMEM; + err = damon_commit_ctx(test_ctx, param_ctx); +- if (err) { +- damon_destroy_ctx(test_ctx); ++ if (err) + goto out; +- } + err = damon_commit_ctx(kdamond->damon_ctx, param_ctx); + out: ++ damon_destroy_ctx(test_ctx); + damon_destroy_ctx(param_ctx); + return err; + } diff --git a/queue-6.17/mm-don-t-spin-in-add_stack_record-when-gfp-flags-don-t-allow.patch b/queue-6.17/mm-don-t-spin-in-add_stack_record-when-gfp-flags-don-t-allow.patch new file mode 100644 index 0000000000..2a8fff2ff1 --- /dev/null +++ b/queue-6.17/mm-don-t-spin-in-add_stack_record-when-gfp-flags-don-t-allow.patch @@ -0,0 +1,58 @@ +From c83aab85e18103a6dc066b4939e2c92a02bb1b05 Mon Sep 17 00:00:00 2001 +From: Alexei Starovoitov +Date: Thu, 9 Oct 2025 17:15:13 -0700 +Subject: mm: don't spin in add_stack_record when gfp flags don't allow + +From: Alexei Starovoitov + +commit c83aab85e18103a6dc066b4939e2c92a02bb1b05 upstream. + +syzbot was able to find the following path: + add_stack_record_to_list mm/page_owner.c:182 [inline] + inc_stack_record_count mm/page_owner.c:214 [inline] + __set_page_owner+0x2c3/0x4a0 mm/page_owner.c:333 + set_page_owner include/linux/page_owner.h:32 [inline] + post_alloc_hook+0x240/0x2a0 mm/page_alloc.c:1851 + prep_new_page mm/page_alloc.c:1859 [inline] + get_page_from_freelist+0x21e4/0x22c0 mm/page_alloc.c:3858 + alloc_pages_nolock_noprof+0x94/0x120 mm/page_alloc.c:7554 + +Don't spin in add_stack_record_to_list() when it is called +from *_nolock() context. + +Link: https://lkml.kernel.org/r/CAADnVQK_8bNYEA7TJYgwTYR57=TTFagsvRxp62pFzS_z129eTg@mail.gmail.com +Fixes: 97769a53f117 ("mm, bpf: Introduce try_alloc_pages() for opportunistic page allocation") +Signed-off-by: Alexei Starovoitov +Reported-by: syzbot+8259e1d0e3ae8ed0c490@syzkaller.appspotmail.com +Reported-by: syzbot+665739f456b28f32b23d@syzkaller.appspotmail.com +Acked-by: Vlastimil Babka +Reviewed-by: Oscar Salvador +Cc: Brendan Jackman +Cc: Johannes Weiner +Cc: Michal Hocko +Cc: Suren Baghdasaryan +Cc: Zi Yan +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/page_owner.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/mm/page_owner.c b/mm/page_owner.c +index c3ca21132c2c..589ec37c94aa 100644 +--- a/mm/page_owner.c ++++ b/mm/page_owner.c +@@ -168,6 +168,9 @@ static void add_stack_record_to_list(struct stack_record *stack_record, + unsigned long flags; + struct stack *stack; + ++ if (!gfpflags_allow_spinning(gfp_mask)) ++ return; ++ + set_current_in_page_owner(); + stack = kmalloc(sizeof(*stack), gfp_nested_mask(gfp_mask)); + if (!stack) { +-- +2.51.1 + diff --git a/queue-6.17/mm-mremap-correctly-account-old-mapping-after-mremap_dontunmap-remap.patch b/queue-6.17/mm-mremap-correctly-account-old-mapping-after-mremap_dontunmap-remap.patch new file mode 100644 index 0000000000..a8a79e2a63 --- /dev/null +++ b/queue-6.17/mm-mremap-correctly-account-old-mapping-after-mremap_dontunmap-remap.patch @@ -0,0 +1,73 @@ +From 0e59f47c15cec4cd88c51c5cda749607b719c82b Mon Sep 17 00:00:00 2001 +From: Lorenzo Stoakes +Date: Mon, 13 Oct 2025 17:58:36 +0100 +Subject: mm/mremap: correctly account old mapping after MREMAP_DONTUNMAP remap + +From: Lorenzo Stoakes + +commit 0e59f47c15cec4cd88c51c5cda749607b719c82b upstream. + +Commit b714ccb02a76 ("mm/mremap: complete refactor of move_vma()") +mistakenly introduced a new behaviour - clearing the VM_ACCOUNT flag of +the old mapping when a mapping is mremap()'d with the MREMAP_DONTUNMAP +flag set. + +While we always clear the VM_LOCKED and VM_LOCKONFAULT flags for the old +mapping (the page tables have been moved, so there is no data that could +possibly be locked in memory), there is no reason to touch any other VMA +flags. + +This is because after the move the old mapping is in a state as if it were +freshly mapped. This implies that the attributes of the mapping ought to +remain the same, including whether or not the mapping is accounted. + +Link: https://lkml.kernel.org/r/20251013165836.273113-1-lorenzo.stoakes@oracle.com +Signed-off-by: Lorenzo Stoakes +Fixes: b714ccb02a76 ("mm/mremap: complete refactor of move_vma()") +Reviewed-by: Pedro Falcato +Cc: Jann Horn +Cc: Liam Howlett +Cc: Vlastimil Babka +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/mremap.c | 15 ++++++--------- + 1 file changed, 6 insertions(+), 9 deletions(-) + +diff --git a/mm/mremap.c b/mm/mremap.c +index 35de0a7b910e..bd7314898ec5 100644 +--- a/mm/mremap.c ++++ b/mm/mremap.c +@@ -1237,10 +1237,10 @@ static int copy_vma_and_data(struct vma_remap_struct *vrm, + } + + /* +- * Perform final tasks for MADV_DONTUNMAP operation, clearing mlock() and +- * account flags on remaining VMA by convention (it cannot be mlock()'d any +- * longer, as pages in range are no longer mapped), and removing anon_vma_chain +- * links from it (if the entire VMA was copied over). ++ * Perform final tasks for MADV_DONTUNMAP operation, clearing mlock() flag on ++ * remaining VMA by convention (it cannot be mlock()'d any longer, as pages in ++ * range are no longer mapped), and removing anon_vma_chain links from it if the ++ * entire VMA was copied over. + */ + static void dontunmap_complete(struct vma_remap_struct *vrm, + struct vm_area_struct *new_vma) +@@ -1250,11 +1250,8 @@ static void dontunmap_complete(struct vma_remap_struct *vrm, + unsigned long old_start = vrm->vma->vm_start; + unsigned long old_end = vrm->vma->vm_end; + +- /* +- * We always clear VM_LOCKED[ONFAULT] | VM_ACCOUNT on the old +- * vma. +- */ +- vm_flags_clear(vrm->vma, VM_LOCKED_MASK | VM_ACCOUNT); ++ /* We always clear VM_LOCKED[ONFAULT] on the old VMA. */ ++ vm_flags_clear(vrm->vma, VM_LOCKED_MASK); + + /* + * anon_vma links of the old vma is no longer needed after its page +-- +2.51.1 + diff --git a/queue-6.17/mm-prevent-poison-consumption-when-splitting-thp.patch b/queue-6.17/mm-prevent-poison-consumption-when-splitting-thp.patch new file mode 100644 index 0000000000..9138b9f65f --- /dev/null +++ b/queue-6.17/mm-prevent-poison-consumption-when-splitting-thp.patch @@ -0,0 +1,125 @@ +From 841a8bfcbad94bb1ba60f59ce34f75259074ae0d Mon Sep 17 00:00:00 2001 +From: Qiuxu Zhuo +Date: Sat, 11 Oct 2025 15:55:19 +0800 +Subject: mm: prevent poison consumption when splitting THP + +From: Qiuxu Zhuo + +commit 841a8bfcbad94bb1ba60f59ce34f75259074ae0d upstream. + +When performing memory error injection on a THP (Transparent Huge Page) +mapped to userspace on an x86 server, the kernel panics with the following +trace. The expected behavior is to terminate the affected process instead +of panicking the kernel, as the x86 Machine Check code can recover from an +in-userspace #MC. + + mce: [Hardware Error]: CPU 0: Machine Check Exception: f Bank 3: bd80000000070134 + mce: [Hardware Error]: RIP 10: {memchr_inv+0x4c/0xf0} + mce: [Hardware Error]: TSC afff7bbff88a ADDR 1d301b000 MISC 80 PPIN 1e741e77539027db + mce: [Hardware Error]: PROCESSOR 0:d06d0 TIME 1758093249 SOCKET 0 APIC 0 microcode 80000320 + mce: [Hardware Error]: Run the above through 'mcelog --ascii' + mce: [Hardware Error]: Machine check: Data load in unrecoverable area of kernel + Kernel panic - not syncing: Fatal local machine check + +The root cause of this panic is that handling a memory failure triggered +by an in-userspace #MC necessitates splitting the THP. The splitting +process employs a mechanism, implemented in +try_to_map_unused_to_zeropage(), which reads the pages in the THP to +identify zero-filled pages. However, reading the pages in the THP results +in a second in-kernel #MC, occurring before the initial memory_failure() +completes, ultimately leading to a kernel panic. See the kernel panic +call trace on the two #MCs. + + First Machine Check occurs // [1] + memory_failure() // [2] + try_to_split_thp_page() + split_huge_page() + split_huge_page_to_list_to_order() + __folio_split() // [3] + remap_page() + remove_migration_ptes() + remove_migration_pte() + try_to_map_unused_to_zeropage() // [4] + memchr_inv() // [5] + Second Machine Check occurs // [6] + Kernel panic + +[1] Triggered by accessing a hardware-poisoned THP in userspace, which is + typically recoverable by terminating the affected process. + +[2] Call folio_set_has_hwpoisoned() before try_to_split_thp_page(). + +[3] Pass the RMP_USE_SHARED_ZEROPAGE remap flag to remap_page(). + +[4] Try to map the unused THP to zeropage. + +[5] Re-access pages in the hw-poisoned THP in the kernel. + +[6] Triggered in-kernel, leading to a panic kernel. + +In Step[2], memory_failure() sets the poisoned flag on the page in the THP +by TestSetPageHWPoison() before calling try_to_split_thp_page(). + +As suggested by David Hildenbrand, fix this panic by not accessing to the +poisoned page in the THP during zeropage identification, while continuing +to scan unaffected pages in the THP for possible zeropage mapping. This +prevents a second in-kernel #MC that would cause kernel panic in Step[4]. + +Thanks to Andrew Zaborowski for his initial work on fixing this issue. + +Link: https://lkml.kernel.org/r/20251015064926.1887643-1-qiuxu.zhuo@intel.com +Link: https://lkml.kernel.org/r/20251011075520.320862-1-qiuxu.zhuo@intel.com +Fixes: b1f202060afe ("mm: remap unused subpages to shared zeropage when splitting isolated thp") +Signed-off-by: Qiuxu Zhuo +Reported-by: Farrah Chen +Suggested-by: David Hildenbrand +Acked-by: David Hildenbrand +Tested-by: Farrah Chen +Tested-by: Qiuxu Zhuo +Acked-by: Lance Yang +Reviewed-by: Wei Yang +Acked-by: Zi Yan +Reviewed-by: Miaohe Lin +Cc: Barry Song +Cc: Dev Jain +Cc: Jiaqi Yan +Cc: Liam Howlett +Cc: Lorenzo Stoakes +Cc: "Luck, Tony" +Cc: Mariano Pache +Cc: Miaohe Lin +Cc: Naoya Horiguchi +Cc: Ryan Roberts +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/huge_memory.c | 3 +++ + mm/migrate.c | 3 ++- + 2 files changed, 5 insertions(+), 1 deletion(-) + +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -4120,6 +4120,9 @@ static bool thp_underused(struct folio * + if (khugepaged_max_ptes_none == HPAGE_PMD_NR - 1) + return false; + ++ if (folio_contain_hwpoisoned_page(folio)) ++ return false; ++ + for (i = 0; i < folio_nr_pages(folio); i++) { + if (pages_identical(folio_page(folio, i), ZERO_PAGE(0))) { + if (++num_zero_pages > khugepaged_max_ptes_none) +--- a/mm/migrate.c ++++ b/mm/migrate.c +@@ -302,8 +302,9 @@ static bool try_to_map_unused_to_zeropag + struct page *page = folio_page(folio, idx); + pte_t newpte; + +- if (PageCompound(page)) ++ if (PageCompound(page) || PageHWPoison(page)) + return false; ++ + VM_BUG_ON_PAGE(!PageAnon(page), page); + VM_BUG_ON_PAGE(!PageLocked(page), page); + VM_BUG_ON_PAGE(pte_present(old_pte), page); diff --git a/queue-6.17/mptcp-pm-in-kernel-c-flag-handle-late-add_addr.patch b/queue-6.17/mptcp-pm-in-kernel-c-flag-handle-late-add_addr.patch new file mode 100644 index 0000000000..acb80705e2 --- /dev/null +++ b/queue-6.17/mptcp-pm-in-kernel-c-flag-handle-late-add_addr.patch @@ -0,0 +1,81 @@ +From e84cb860ac3ce67ec6ecc364433fd5b412c448bc Mon Sep 17 00:00:00 2001 +From: "Matthieu Baerts (NGI0)" +Date: Mon, 20 Oct 2025 22:53:26 +0200 +Subject: mptcp: pm: in-kernel: C-flag: handle late ADD_ADDR + +From: Matthieu Baerts (NGI0) + +commit e84cb860ac3ce67ec6ecc364433fd5b412c448bc upstream. + +The special C-flag case expects the ADD_ADDR to be received when +switching to 'fully-established'. But for various reasons, the ADD_ADDR +could be sent after the "4th ACK", and the special case doesn't work. + +On NIPA, the new test validating this special case for the C-flag failed +a few times, e.g. + + 102 default limits, server deny join id 0 + syn rx [FAIL] got 0 JOIN[s] syn rx expected 2 + + Server ns stats + (...) + MPTcpExtAddAddrTx 1 + MPTcpExtEchoAdd 1 + + Client ns stats + (...) + MPTcpExtAddAddr 1 + MPTcpExtEchoAddTx 1 + + synack rx [FAIL] got 0 JOIN[s] synack rx expected 2 + ack rx [FAIL] got 0 JOIN[s] ack rx expected 2 + join Rx [FAIL] see above + syn tx [FAIL] got 0 JOIN[s] syn tx expected 2 + join Tx [FAIL] see above + +I had a suspicion about what the issue could be: the ADD_ADDR might have +been received after the switch to the 'fully-established' state. The +issue was not easy to reproduce. The packet capture shown that the +ADD_ADDR can indeed be sent with a delay, and the client would not try +to establish subflows to it as expected. + +A simple fix is not to mark the endpoints as 'used' in the C-flag case, +when looking at creating subflows to the remote initial IP address and +port. In this case, there is no need to try. + +Note: newly added fullmesh endpoints will still continue to be used as +expected, thanks to the conditions behind mptcp_pm_add_addr_c_flag_case. + +Fixes: 4b1ff850e0c1 ("mptcp: pm: in-kernel: usable client side with C-flag") +Cc: stable@vger.kernel.org +Reviewed-by: Geliang Tang +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20251020-net-mptcp-c-flag-late-add-addr-v1-1-8207030cb0e8@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/pm_kernel.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/net/mptcp/pm_kernel.c ++++ b/net/mptcp/pm_kernel.c +@@ -333,6 +333,10 @@ static void mptcp_pm_create_subflow_or_s + } + + subflow: ++ /* No need to try establishing subflows to remote id0 if not allowed */ ++ if (mptcp_pm_add_addr_c_flag_case(msk)) ++ goto exit; ++ + /* check if should create a new subflow */ + while (msk->pm.local_addr_used < local_addr_max && + msk->pm.subflows < subflows_max) { +@@ -364,6 +368,8 @@ subflow: + __mptcp_subflow_connect(sk, &local, &addrs[i]); + spin_lock_bh(&msk->pm.lock); + } ++ ++exit: + mptcp_pm_nl_check_work_pending(msk); + } + diff --git a/queue-6.17/net-bonding-fix-possible-peer-notify-event-loss-or-dup-issue.patch b/queue-6.17/net-bonding-fix-possible-peer-notify-event-loss-or-dup-issue.patch new file mode 100644 index 0000000000..716318a7c6 --- /dev/null +++ b/queue-6.17/net-bonding-fix-possible-peer-notify-event-loss-or-dup-issue.patch @@ -0,0 +1,113 @@ +From 10843e1492e474c02b91314963161731fa92af91 Mon Sep 17 00:00:00 2001 +From: Tonghao Zhang +Date: Tue, 21 Oct 2025 13:09:33 +0800 +Subject: net: bonding: fix possible peer notify event loss or dup issue + +From: Tonghao Zhang + +commit 10843e1492e474c02b91314963161731fa92af91 upstream. + +If the send_peer_notif counter and the peer event notify are not synchronized. +It may cause problems such as the loss or dup of peer notify event. + +Before this patch: +- If should_notify_peers is true and the lock for send_peer_notif-- fails, peer + event may be sent again in next mii_monitor loop, because should_notify_peers + is still true. +- If should_notify_peers is true and the lock for send_peer_notif-- succeeded, + but the lock for peer event fails, the peer event will be lost. + +This patch locks the RTNL for send_peer_notif, events, and commit simultaneously. + +Fixes: 07a4ddec3ce9 ("bonding: add an option to specify a delay between peer notifications") +Cc: Jay Vosburgh +Cc: Andrew Lunn +Cc: Eric Dumazet +Cc: Jakub Kicinski +Cc: Paolo Abeni +Cc: Hangbin Liu +Cc: Nikolay Aleksandrov +Cc: Vincent Bernat +Cc: +Signed-off-by: Tonghao Zhang +Acked-by: Jay Vosburgh +Link: https://patch.msgid.link/20251021050933.46412-1-tonghao@bamaicloud.com +Signed-off-by: Paolo Abeni +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bonding/bond_main.c | 40 ++++++++++++++++++---------------------- + 1 file changed, 18 insertions(+), 22 deletions(-) + +--- a/drivers/net/bonding/bond_main.c ++++ b/drivers/net/bonding/bond_main.c +@@ -2969,7 +2969,7 @@ static void bond_mii_monitor(struct work + { + struct bonding *bond = container_of(work, struct bonding, + mii_work.work); +- bool should_notify_peers = false; ++ bool should_notify_peers; + bool commit; + unsigned long delay; + struct slave *slave; +@@ -2981,30 +2981,33 @@ static void bond_mii_monitor(struct work + goto re_arm; + + rcu_read_lock(); ++ + should_notify_peers = bond_should_notify_peers(bond); + commit = !!bond_miimon_inspect(bond); +- if (bond->send_peer_notif) { +- rcu_read_unlock(); +- if (rtnl_trylock()) { +- bond->send_peer_notif--; +- rtnl_unlock(); +- } +- } else { +- rcu_read_unlock(); +- } + +- if (commit) { ++ rcu_read_unlock(); ++ ++ if (commit || bond->send_peer_notif) { + /* Race avoidance with bond_close cancel of workqueue */ + if (!rtnl_trylock()) { + delay = 1; +- should_notify_peers = false; + goto re_arm; + } + +- bond_for_each_slave(bond, slave, iter) { +- bond_commit_link_state(slave, BOND_SLAVE_NOTIFY_LATER); ++ if (commit) { ++ bond_for_each_slave(bond, slave, iter) { ++ bond_commit_link_state(slave, ++ BOND_SLAVE_NOTIFY_LATER); ++ } ++ bond_miimon_commit(bond); ++ } ++ ++ if (bond->send_peer_notif) { ++ bond->send_peer_notif--; ++ if (should_notify_peers) ++ call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, ++ bond->dev); + } +- bond_miimon_commit(bond); + + rtnl_unlock(); /* might sleep, hold no other locks */ + } +@@ -3012,13 +3015,6 @@ static void bond_mii_monitor(struct work + re_arm: + if (bond->params.miimon) + queue_delayed_work(bond->wq, &bond->mii_work, delay); +- +- if (should_notify_peers) { +- if (!rtnl_trylock()) +- return; +- call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev); +- rtnl_unlock(); +- } + } + + static int bond_upper_dev_walk(struct net_device *upper, diff --git a/queue-6.17/net-bonding-update-the-slave-array-for-broadcast-mode.patch b/queue-6.17/net-bonding-update-the-slave-array-for-broadcast-mode.patch new file mode 100644 index 0000000000..1ed4940b75 --- /dev/null +++ b/queue-6.17/net-bonding-update-the-slave-array-for-broadcast-mode.patch @@ -0,0 +1,56 @@ +From e0caeb24f538c3c9c94f471882ceeb43d9dc2739 Mon Sep 17 00:00:00 2001 +From: Tonghao Zhang +Date: Thu, 16 Oct 2025 20:51:36 +0800 +Subject: net: bonding: update the slave array for broadcast mode + +From: Tonghao Zhang + +commit e0caeb24f538c3c9c94f471882ceeb43d9dc2739 upstream. + +This patch fixes ce7a381697cb ("net: bonding: add broadcast_neighbor option for 802.3ad"). +Before this commit, on the broadcast mode, all devices were traversed using the +bond_for_each_slave_rcu. This patch supports traversing devices by using all_slaves. +Therefore, we need to update the slave array when enslave or release slave. + +Fixes: ce7a381697cb ("net: bonding: add broadcast_neighbor option for 802.3ad") +Cc: Simon Horman +Cc: Jonathan Corbet +Cc: Andrew Lunn +Cc: +Reported-by: Jiri Slaby +Tested-by: Jiri Slaby +Link: https://lore.kernel.org/all/a97e6e1e-81bc-4a79-8352-9e4794b0d2ca@kernel.org/ +Signed-off-by: Tonghao Zhang +Reviewed-by: Hangbin Liu +Reviewed-by: Nikolay Aleksandrov +Acked-by: Jay Vosburgh +Link: https://patch.msgid.link/20251016125136.16568-1-tonghao@bamaicloud.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bonding/bond_main.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/drivers/net/bonding/bond_main.c ++++ b/drivers/net/bonding/bond_main.c +@@ -2385,7 +2385,9 @@ skip_mac_set: + unblock_netpoll_tx(); + } + +- if (bond_mode_can_use_xmit_hash(bond)) ++ /* broadcast mode uses the all_slaves to loop through slaves. */ ++ if (bond_mode_can_use_xmit_hash(bond) || ++ BOND_MODE(bond) == BOND_MODE_BROADCAST) + bond_update_slave_arr(bond, NULL); + + if (!slave_dev->netdev_ops->ndo_bpf || +@@ -2561,7 +2563,8 @@ static int __bond_release_one(struct net + + bond_upper_dev_unlink(bond, slave); + +- if (bond_mode_can_use_xmit_hash(bond)) ++ if (bond_mode_can_use_xmit_hash(bond) || ++ BOND_MODE(bond) == BOND_MODE_BROADCAST) + bond_update_slave_arr(bond, slave); + + slave_info(bond_dev, slave_dev, "Releasing %s interface\n", diff --git a/queue-6.17/net-ravb-enforce-descriptor-type-ordering.patch b/queue-6.17/net-ravb-enforce-descriptor-type-ordering.patch new file mode 100644 index 0000000000..f3c90aaddf --- /dev/null +++ b/queue-6.17/net-ravb-enforce-descriptor-type-ordering.patch @@ -0,0 +1,73 @@ +From 5370c31e84b0e0999c7b5ff949f4e104def35584 Mon Sep 17 00:00:00 2001 +From: Lad Prabhakar +Date: Fri, 17 Oct 2025 16:18:29 +0100 +Subject: net: ravb: Enforce descriptor type ordering +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Lad Prabhakar + +commit 5370c31e84b0e0999c7b5ff949f4e104def35584 upstream. + +Ensure the TX descriptor type fields are published in a safe order so the +DMA engine never begins processing a descriptor chain before all descriptor +fields are fully initialised. + +For multi-descriptor transmits the driver writes DT_FEND into the last +descriptor and DT_FSTART into the first. The DMA engine begins processing +when it observes DT_FSTART. Move the dma_wmb() barrier so it executes +immediately after DT_FEND and immediately before writing DT_FSTART +(and before DT_FSINGLE in the single-descriptor case). This guarantees +that all prior CPU writes to the descriptor memory are visible to the +device before DT_FSTART is seen. + +This avoids a situation where compiler/CPU reordering could publish +DT_FSTART ahead of DT_FEND or other descriptor fields, allowing the DMA to +start on a partially initialised chain and causing corrupted transmissions +or TX timeouts. Such a failure was observed on RZ/G2L with an RT kernel as +transmit queue timeouts and device resets. + +Fixes: 2f45d1902acf ("ravb: minimize TX data copying") +Cc: stable@vger.kernel.org +Co-developed-by: Fabrizio Castro +Signed-off-by: Fabrizio Castro +Signed-off-by: Lad Prabhakar +Reviewed-by: Niklas Söderlund +Link: https://patch.msgid.link/20251017151830.171062-4-prabhakar.mahadev-lad.rj@bp.renesas.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/renesas/ravb_main.c | 16 ++++++++++++++-- + 1 file changed, 14 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/renesas/ravb_main.c ++++ b/drivers/net/ethernet/renesas/ravb_main.c +@@ -2210,13 +2210,25 @@ static netdev_tx_t ravb_start_xmit(struc + + skb_tx_timestamp(skb); + } +- /* Descriptor type must be set after all the above writes */ +- dma_wmb(); ++ + if (num_tx_desc > 1) { + desc->die_dt = DT_FEND; + desc--; ++ /* When using multi-descriptors, DT_FEND needs to get written ++ * before DT_FSTART, but the compiler may reorder the memory ++ * writes in an attempt to optimize the code. ++ * Use a dma_wmb() barrier to make sure DT_FEND and DT_FSTART ++ * are written exactly in the order shown in the code. ++ * This is particularly important for cases where the DMA engine ++ * is already running when we are running this code. If the DMA ++ * sees DT_FSTART without the corresponding DT_FEND it will enter ++ * an error condition. ++ */ ++ dma_wmb(); + desc->die_dt = DT_FSTART; + } else { ++ /* Descriptor type must be set after all the above writes */ ++ dma_wmb(); + desc->die_dt = DT_FSINGLE; + } + ravb_modify(ndev, TCCR, TCCR_TSRQ0 << q, TCCR_TSRQ0 << q); diff --git a/queue-6.17/net-ravb-ensure-memory-write-completes-before-ringing-tx-doorbell.patch b/queue-6.17/net-ravb-ensure-memory-write-completes-before-ringing-tx-doorbell.patch new file mode 100644 index 0000000000..69ff414d5b --- /dev/null +++ b/queue-6.17/net-ravb-ensure-memory-write-completes-before-ringing-tx-doorbell.patch @@ -0,0 +1,52 @@ +From 706136c5723626fcde8dd8f598a4dcd251e24927 Mon Sep 17 00:00:00 2001 +From: Lad Prabhakar +Date: Fri, 17 Oct 2025 16:18:30 +0100 +Subject: net: ravb: Ensure memory write completes before ringing TX doorbell +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Lad Prabhakar + +commit 706136c5723626fcde8dd8f598a4dcd251e24927 upstream. + +Add a final dma_wmb() barrier before triggering the transmit request +(TCCR_TSRQ) to ensure all descriptor and buffer writes are visible to +the DMA engine. + +According to the hardware manual, a read-back operation is required +before writing to the doorbell register to guarantee completion of +previous writes. Instead of performing a dummy read, a dma_wmb() is +used to both enforce the same ordering semantics on the CPU side and +also to ensure completion of writes. + +Fixes: c156633f1353 ("Renesas Ethernet AVB driver proper") +Cc: stable@vger.kernel.org +Co-developed-by: Fabrizio Castro +Signed-off-by: Fabrizio Castro +Signed-off-by: Lad Prabhakar +Reviewed-by: Niklas Söderlund +Link: https://patch.msgid.link/20251017151830.171062-5-prabhakar.mahadev-lad.rj@bp.renesas.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/renesas/ravb_main.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/drivers/net/ethernet/renesas/ravb_main.c ++++ b/drivers/net/ethernet/renesas/ravb_main.c +@@ -2231,6 +2231,14 @@ static netdev_tx_t ravb_start_xmit(struc + dma_wmb(); + desc->die_dt = DT_FSINGLE; + } ++ ++ /* Before ringing the doorbell we need to make sure that the latest ++ * writes have been committed to memory, otherwise it could delay ++ * things until the doorbell is rang again. ++ * This is in replacement of the read operation mentioned in the HW ++ * manuals. ++ */ ++ dma_wmb(); + ravb_modify(ndev, TCCR, TCCR_TSRQ0 << q, TCCR_TSRQ0 << q); + + priv->cur_tx[q] += num_tx_desc; diff --git a/queue-6.17/net-stmmac-dwmac-rk-fix-disabling-set_clock_selection.patch b/queue-6.17/net-stmmac-dwmac-rk-fix-disabling-set_clock_selection.patch new file mode 100644 index 0000000000..6ec0101973 --- /dev/null +++ b/queue-6.17/net-stmmac-dwmac-rk-fix-disabling-set_clock_selection.patch @@ -0,0 +1,51 @@ +From 7f864458e9a6d2000b726d14b3d3a706ac92a3b0 Mon Sep 17 00:00:00 2001 +From: Sebastian Reichel +Date: Tue, 14 Oct 2025 17:49:34 +0200 +Subject: net: stmmac: dwmac-rk: Fix disabling set_clock_selection + +From: Sebastian Reichel + +commit 7f864458e9a6d2000b726d14b3d3a706ac92a3b0 upstream. + +On all platforms set_clock_selection() writes to a GRF register. This +requires certain clocks running and thus should happen before the +clocks are disabled. + +This has been noticed on RK3576 Sige5, which hangs during system suspend +when trying to suspend the second network interface. Note, that +suspending the first interface works, because the second device ensures +that the necessary clocks for the GRF are enabled. + +Cc: stable@vger.kernel.org +Fixes: 2f2b60a0ec28 ("net: ethernet: stmmac: dwmac-rk: Add gmac support for rk3588") +Signed-off-by: Sebastian Reichel +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20251014-rockchip-network-clock-fix-v1-1-c257b4afdf75@collabora.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c ++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +@@ -1448,14 +1448,15 @@ static int gmac_clk_enable(struct rk_pri + } + } else { + if (bsp_priv->clk_enabled) { ++ if (bsp_priv->ops && bsp_priv->ops->set_clock_selection) { ++ bsp_priv->ops->set_clock_selection(bsp_priv, ++ bsp_priv->clock_input, false); ++ } ++ + clk_bulk_disable_unprepare(bsp_priv->num_clks, + bsp_priv->clks); + clk_disable_unprepare(bsp_priv->clk_phy); + +- if (bsp_priv->ops && bsp_priv->ops->set_clock_selection) +- bsp_priv->ops->set_clock_selection(bsp_priv, +- bsp_priv->clock_input, false); +- + bsp_priv->clk_enabled = false; + } + } diff --git a/queue-6.17/net-usb-rtl8150-fix-frame-padding.patch b/queue-6.17/net-usb-rtl8150-fix-frame-padding.patch new file mode 100644 index 0000000000..e70dd4d295 --- /dev/null +++ b/queue-6.17/net-usb-rtl8150-fix-frame-padding.patch @@ -0,0 +1,54 @@ +From 75cea9860aa6b2350d90a8d78fed114d27c7eca2 Mon Sep 17 00:00:00 2001 +From: Michal Pecio +Date: Tue, 14 Oct 2025 20:35:28 +0200 +Subject: net: usb: rtl8150: Fix frame padding + +From: Michal Pecio + +commit 75cea9860aa6b2350d90a8d78fed114d27c7eca2 upstream. + +TX frames aren't padded and unknown memory is sent into the ether. + +Theoretically, it isn't even guaranteed that the extra memory exists +and can be sent out, which could cause further problems. In practice, +I found that plenty of tailroom exists in the skb itself (in my test +with ping at least) and skb_padto() easily succeeds, so use it here. + +In the event of -ENOMEM drop the frame like other drivers do. + +The use of one more padding byte instead of a USB zero-length packet +is retained to avoid regression. I have a dodgy Etron xHCI controller +which doesn't seem to support sending ZLPs at all. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Cc: stable@vger.kernel.org +Signed-off-by: Michal Pecio +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20251014203528.3f9783c4.michal.pecio@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/rtl8150.c | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +--- a/drivers/net/usb/rtl8150.c ++++ b/drivers/net/usb/rtl8150.c +@@ -685,9 +685,16 @@ static netdev_tx_t rtl8150_start_xmit(st + rtl8150_t *dev = netdev_priv(netdev); + int count, res; + ++ /* pad the frame and ensure terminating USB packet, datasheet 9.2.3 */ ++ count = max(skb->len, ETH_ZLEN); ++ if (count % 64 == 0) ++ count++; ++ if (skb_padto(skb, count)) { ++ netdev->stats.tx_dropped++; ++ return NETDEV_TX_OK; ++ } ++ + netif_stop_queue(netdev); +- count = (skb->len < 60) ? 60 : skb->len; +- count = (count & 0x3f) ? count : count + 1; + dev->tx_skb = skb; + usb_fill_bulk_urb(dev->tx_urb, dev->udev, usb_sndbulkpipe(dev->udev, 2), + skb->data, count, write_bulk_callback, dev); diff --git a/queue-6.17/ocfs2-clear-extent-cache-after-moving-defragmenting-extents.patch b/queue-6.17/ocfs2-clear-extent-cache-after-moving-defragmenting-extents.patch new file mode 100644 index 0000000000..1278a03720 --- /dev/null +++ b/queue-6.17/ocfs2-clear-extent-cache-after-moving-defragmenting-extents.patch @@ -0,0 +1,62 @@ +From 78a63493f8e352296dbc7cb7b3f4973105e8679e Mon Sep 17 00:00:00 2001 +From: Deepanshu Kartikey +Date: Thu, 9 Oct 2025 21:19:03 +0530 +Subject: ocfs2: clear extent cache after moving/defragmenting extents + +From: Deepanshu Kartikey + +commit 78a63493f8e352296dbc7cb7b3f4973105e8679e upstream. + +The extent map cache can become stale when extents are moved or +defragmented, causing subsequent operations to see outdated extent flags. +This triggers a BUG_ON in ocfs2_refcount_cal_cow_clusters(). + +The problem occurs when: +1. copy_file_range() creates a reflinked extent with OCFS2_EXT_REFCOUNTED +2. ioctl(FITRIM) triggers ocfs2_move_extents() +3. __ocfs2_move_extents_range() reads and caches the extent (flags=0x2) +4. ocfs2_move_extent()/ocfs2_defrag_extent() calls __ocfs2_move_extent() + which clears OCFS2_EXT_REFCOUNTED flag on disk (flags=0x0) +5. The extent map cache is not invalidated after the move +6. Later write() operations read stale cached flags (0x2) but disk has + updated flags (0x0), causing a mismatch +7. BUG_ON(!(rec->e_flags & OCFS2_EXT_REFCOUNTED)) triggers + +Fix by clearing the extent map cache after each extent move/defrag +operation in __ocfs2_move_extents_range(). This ensures subsequent +operations read fresh extent data from disk. + +Link: https://lore.kernel.org/all/20251009142917.517229-1-kartikey406@gmail.com/T/ +Link: https://lkml.kernel.org/r/20251009154903.522339-1-kartikey406@gmail.com +Fixes: 53069d4e7695 ("Ocfs2/move_extents: move/defrag extents within a certain range.") +Signed-off-by: Deepanshu Kartikey +Reported-by: syzbot+6fdd8fa3380730a4b22c@syzkaller.appspotmail.com +Tested-by: syzbot+6fdd8fa3380730a4b22c@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?id=2959889e1f6e216585ce522f7e8bc002b46ad9e7 +Reviewed-by: Mark Fasheh +Reviewed-by: Joseph Qi +Cc: Joel Becker +Cc: Junxiao Bi +Cc: Changwei Ge +Cc: Jun Piao +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/ocfs2/move_extents.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/fs/ocfs2/move_extents.c ++++ b/fs/ocfs2/move_extents.c +@@ -867,6 +867,11 @@ static int __ocfs2_move_extents_range(st + mlog_errno(ret); + goto out; + } ++ /* ++ * Invalidate extent cache after moving/defragging to prevent ++ * stale cached data with outdated extent flags. ++ */ ++ ocfs2_extent_map_trunc(inode, cpos); + + context->clusters_moved += alloc_size; + next: diff --git a/queue-6.17/platform-x86-alienware-wmi-wmax-add-awcc-support-to-dell-g15-5530.patch b/queue-6.17/platform-x86-alienware-wmi-wmax-add-awcc-support-to-dell-g15-5530.patch new file mode 100644 index 0000000000..8948fc2c96 --- /dev/null +++ b/queue-6.17/platform-x86-alienware-wmi-wmax-add-awcc-support-to-dell-g15-5530.patch @@ -0,0 +1,42 @@ +From 34cbd6e07fddf36e186c8bf26a456fb7f50af44e Mon Sep 17 00:00:00 2001 +From: tr1x_em +Date: Thu, 25 Sep 2025 09:10:03 +0530 +Subject: platform/x86: alienware-wmi-wmax: Add AWCC support to Dell G15 5530 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: tr1x_em + +commit 34cbd6e07fddf36e186c8bf26a456fb7f50af44e upstream. + +Makes alienware-wmi load on G15 5530 by default + +Cc: stable@vger.kernel.org +Signed-off-by: Saumya +Reviewed-by: Kurt Borja +Link: https://patch.msgid.link/20250925034010.31414-1-admin@trix.is-a.dev +Reviewed-by: Ilpo Järvinen +Signed-off-by: Ilpo Järvinen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/platform/x86/dell/alienware-wmi-wmax.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/drivers/platform/x86/dell/alienware-wmi-wmax.c ++++ b/drivers/platform/x86/dell/alienware-wmi-wmax.c +@@ -210,6 +210,14 @@ static const struct dmi_system_id awcc_d + .driver_data = &g_series_quirks, + }, + { ++ .ident = "Dell Inc. G15 5530", ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), ++ DMI_MATCH(DMI_PRODUCT_NAME, "Dell G15 5530"), ++ }, ++ .driver_data = &g_series_quirks, ++ }, ++ { + .ident = "Dell Inc. G16 7630", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), diff --git a/queue-6.17/platform-x86-alienware-wmi-wmax-fix-null-pointer-dereference-in-sleep-handlers.patch b/queue-6.17/platform-x86-alienware-wmi-wmax-fix-null-pointer-dereference-in-sleep-handlers.patch new file mode 100644 index 0000000000..497e26ec2e --- /dev/null +++ b/queue-6.17/platform-x86-alienware-wmi-wmax-fix-null-pointer-dereference-in-sleep-handlers.patch @@ -0,0 +1,48 @@ +From a49c4d48c3b60926e6a8cec217bf95aa65388ecc Mon Sep 17 00:00:00 2001 +From: Kurt Borja +Date: Tue, 14 Oct 2025 05:07:27 -0500 +Subject: platform/x86: alienware-wmi-wmax: Fix NULL pointer dereference in sleep handlers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Kurt Borja + +commit a49c4d48c3b60926e6a8cec217bf95aa65388ecc upstream. + +Devices without the AWCC interface don't initialize `awcc`. Add a check +before dereferencing it in sleep handlers. + +Cc: stable@vger.kernel.org +Reported-by: Gal Hammer +Tested-by: Gal Hammer +Fixes: 07ac275981b1 ("platform/x86: alienware-wmi-wmax: Add support for manual fan control") +Signed-off-by: Kurt Borja +Link: https://patch.msgid.link/20251014-sleep-fix-v3-1-b5cb58da4638@gmail.com +Reviewed-by: Ilpo Järvinen +Signed-off-by: Ilpo Järvinen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/platform/x86/dell/alienware-wmi-wmax.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/platform/x86/dell/alienware-wmi-wmax.c ++++ b/drivers/platform/x86/dell/alienware-wmi-wmax.c +@@ -1647,7 +1647,7 @@ static int wmax_wmi_probe(struct wmi_dev + + static int wmax_wmi_suspend(struct device *dev) + { +- if (awcc->hwmon) ++ if (awcc && awcc->hwmon) + awcc_hwmon_suspend(dev); + + return 0; +@@ -1655,7 +1655,7 @@ static int wmax_wmi_suspend(struct devic + + static int wmax_wmi_resume(struct device *dev) + { +- if (awcc->hwmon) ++ if (awcc && awcc->hwmon) + awcc_hwmon_resume(dev); + + return 0; diff --git a/queue-6.17/revert-cpuidle-menu-avoid-discarding-useful-information.patch b/queue-6.17/revert-cpuidle-menu-avoid-discarding-useful-information.patch new file mode 100644 index 0000000000..41f89c5019 --- /dev/null +++ b/queue-6.17/revert-cpuidle-menu-avoid-discarding-useful-information.patch @@ -0,0 +1,78 @@ +From 10fad4012234a7dea621ae17c0c9486824f645a0 Mon Sep 17 00:00:00 2001 +From: "Rafael J. Wysocki" +Date: Sat, 18 Oct 2025 14:27:15 +0200 +Subject: Revert "cpuidle: menu: Avoid discarding useful information" + +From: Rafael J. Wysocki + +commit 10fad4012234a7dea621ae17c0c9486824f645a0 upstream. + +It is reported that commit 85975daeaa4d ("cpuidle: menu: Avoid discarding +useful information") led to a performance regression on Intel Jasper Lake +systems because it reduced the time spent by CPUs in idle state C7 which +is correlated to the maximum frequency the CPUs can get to because of an +average running power limit [1]. + +Before that commit, get_typical_interval() would have returned UINT_MAX +whenever it had been unable to make a high-confidence prediction which +had led to selecting the deepest available idle state too often and +both power and performance had been inadequate as a result of that on +some systems. However, this had not been a problem on systems with +relatively aggressive average running power limits, like the Jasper Lake +systems in question, because on those systems it was compensated by the +ability to run CPUs faster. + +It was addressed by causing get_typical_interval() to return a number +based on the recent idle duration information available to it even if it +could not make a high-confidence prediction, but that clearly did not +take the possible correlation between idle power and available CPU +capacity into account. + +For this reason, revert most of the changes made by commit 85975daeaa4d, +except for one cosmetic cleanup, and add a comment explaining the +rationale for returning UINT_MAX from get_typical_interval() when it +is unable to make a high-confidence prediction. + +Fixes: 85975daeaa4d ("cpuidle: menu: Avoid discarding useful information") +Closes: https://lore.kernel.org/linux-pm/36iykr223vmcfsoysexug6s274nq2oimcu55ybn6ww4il3g3cv@cohflgdbpnq7/ [1] +Reported-by: Sergey Senozhatsky +Cc: All applicable +Signed-off-by: Rafael J. Wysocki +Link: https://patch.msgid.link/3663603.iIbC2pHGDl@rafael.j.wysocki +Signed-off-by: Greg Kroah-Hartman +--- + drivers/cpuidle/governors/menu.c | 21 +++++++++------------ + 1 file changed, 9 insertions(+), 12 deletions(-) + +--- a/drivers/cpuidle/governors/menu.c ++++ b/drivers/cpuidle/governors/menu.c +@@ -188,20 +188,17 @@ again: + * + * This can deal with workloads that have long pauses interspersed + * with sporadic activity with a bunch of short pauses. ++ * ++ * However, if the number of remaining samples is too small to exclude ++ * any more outliers, allow the deepest available idle state to be ++ * selected because there are systems where the time spent by CPUs in ++ * deep idle states is correlated to the maximum frequency the CPUs ++ * can get to. On those systems, shallow idle states should be avoided ++ * unless there is a clear indication that the given CPU is most likley ++ * going to be woken up shortly. + */ +- if (divisor * 4 <= INTERVALS * 3) { +- /* +- * If there are sufficiently many data points still under +- * consideration after the outliers have been eliminated, +- * returning without a prediction would be a mistake because it +- * is likely that the next interval will not exceed the current +- * maximum, so return the latter in that case. +- */ +- if (divisor >= INTERVALS / 2) +- return max; +- ++ if (divisor * 4 <= INTERVALS * 3) + return UINT_MAX; +- } + + /* Update the thresholds for the next round. */ + if (avg - min > max - avg) diff --git a/queue-6.17/riscv-cpufeature-avoid-uninitialized-variable-in-has_thead_homogeneous_vlenb.patch b/queue-6.17/riscv-cpufeature-avoid-uninitialized-variable-in-has_thead_homogeneous_vlenb.patch new file mode 100644 index 0000000000..209dedce5f --- /dev/null +++ b/queue-6.17/riscv-cpufeature-avoid-uninitialized-variable-in-has_thead_homogeneous_vlenb.patch @@ -0,0 +1,44 @@ +From 2dc99ea2727640b2fe12f9aa0e38ea2fc3cbb92d Mon Sep 17 00:00:00 2001 +From: Paul Walmsley +Date: Sat, 18 Oct 2025 00:31:11 -0600 +Subject: riscv: cpufeature: avoid uninitialized variable in has_thead_homogeneous_vlenb() + +From: Paul Walmsley + +commit 2dc99ea2727640b2fe12f9aa0e38ea2fc3cbb92d upstream. + +In has_thead_homogeneous_vlenb(), smatch detected that the vlenb variable +could be used while uninitialized. It appears that this could happen if +no CPUs described in DT have the "thead,vlenb" property. + +Fix by initializing vlenb to 0, which will keep thead_vlenb_of set to 0 +(as it was statically initialized). This in turn will cause +riscv_v_setup_vsize() to fall back to CSR probing - the desired result if +thead,vlenb isn't provided in the DT data. + +While here, fix a nearby comment typo. + +Cc: stable@vger.kernel.org +Cc: Charlie Jenkins +Fixes: 377be47f90e41 ("riscv: vector: Use vlenb from DT for thead") +Signed-off-by: Paul Walmsley +Link: https://lore.kernel.org/r/22674afb-2fe8-2a83-1818-4c37bd554579@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + arch/riscv/kernel/cpufeature.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/arch/riscv/kernel/cpufeature.c ++++ b/arch/riscv/kernel/cpufeature.c +@@ -932,9 +932,9 @@ static int has_thead_homogeneous_vlenb(v + { + int cpu; + u32 prev_vlenb = 0; +- u32 vlenb; ++ u32 vlenb = 0; + +- /* Ignore thead,vlenb property if xtheavector is not enabled in the kernel */ ++ /* Ignore thead,vlenb property if xtheadvector is not enabled in the kernel */ + if (!IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR)) + return 0; + diff --git a/queue-6.17/riscv-hwprobe-fix-stale-vdso-data-for-late-initialized-keys-at-boot.patch b/queue-6.17/riscv-hwprobe-fix-stale-vdso-data-for-late-initialized-keys-at-boot.patch new file mode 100644 index 0000000000..960244ffc6 --- /dev/null +++ b/queue-6.17/riscv-hwprobe-fix-stale-vdso-data-for-late-initialized-keys-at-boot.patch @@ -0,0 +1,220 @@ +From 5d15d2ad36b0f7afab83ca9fc8a2a6e60cbe54c4 Mon Sep 17 00:00:00 2001 +From: Jingwei Wang +Date: Mon, 11 Aug 2025 22:20:06 +0800 +Subject: riscv: hwprobe: Fix stale vDSO data for late-initialized keys at boot + +From: Jingwei Wang + +commit 5d15d2ad36b0f7afab83ca9fc8a2a6e60cbe54c4 upstream. + +The hwprobe vDSO data for some keys, like MISALIGNED_VECTOR_PERF, +is determined by an asynchronous kthread. This can create a race +condition where the kthread finishes after the vDSO data has +already been populated, causing userspace to read stale values. + +To fix this race, a new 'ready' flag is added to the vDSO data, +initialized to 'false' during arch_initcall_sync. This flag is +checked by both the vDSO's user-space code and the riscv_hwprobe +syscall. The syscall serves as a one-time gate, using a completion +to wait for any pending probes before populating the data and +setting the flag to 'true', thus ensuring userspace reads fresh +values on its first request. + +Reported-by: Tsukasa OI +Closes: https://lore.kernel.org/linux-riscv/760d637b-b13b-4518-b6bf-883d55d44e7f@irq.a4lg.com/ +Fixes: e7c9d66e313b ("RISC-V: Report vector unaligned access speed hwprobe") +Cc: Palmer Dabbelt +Cc: Alexandre Ghiti +Cc: Olof Johansson +Cc: stable@vger.kernel.org +Reviewed-by: Alexandre Ghiti +Co-developed-by: Palmer Dabbelt +Signed-off-by: Palmer Dabbelt +Signed-off-by: Jingwei Wang +Link: https://lore.kernel.org/r/20250811142035.105820-1-wangjingwei@iscas.ac.cn +[pjw@kernel.org: fix checkpatch issues] +Signed-off-by: Paul Walmsley +Signed-off-by: Greg Kroah-Hartman +--- + arch/riscv/include/asm/hwprobe.h | 7 ++ + arch/riscv/include/asm/vdso/arch_data.h | 6 ++ + arch/riscv/kernel/sys_hwprobe.c | 70 ++++++++++++++++++++++++----- + arch/riscv/kernel/unaligned_access_speed.c | 9 ++- + arch/riscv/kernel/vdso/hwprobe.c | 2 + 5 files changed, 79 insertions(+), 15 deletions(-) + +--- a/arch/riscv/include/asm/hwprobe.h ++++ b/arch/riscv/include/asm/hwprobe.h +@@ -41,4 +41,11 @@ static inline bool riscv_hwprobe_pair_cm + return pair->value == other_pair->value; + } + ++#ifdef CONFIG_MMU ++void riscv_hwprobe_register_async_probe(void); ++void riscv_hwprobe_complete_async_probe(void); ++#else ++static inline void riscv_hwprobe_register_async_probe(void) {} ++static inline void riscv_hwprobe_complete_async_probe(void) {} ++#endif + #endif +--- a/arch/riscv/include/asm/vdso/arch_data.h ++++ b/arch/riscv/include/asm/vdso/arch_data.h +@@ -12,6 +12,12 @@ struct vdso_arch_data { + + /* Boolean indicating all CPUs have the same static hwprobe values. */ + __u8 homogeneous_cpus; ++ ++ /* ++ * A gate to check and see if the hwprobe data is actually ready, as ++ * probing is deferred to avoid boot slowdowns. ++ */ ++ __u8 ready; + }; + + #endif /* __RISCV_ASM_VDSO_ARCH_DATA_H */ +--- a/arch/riscv/kernel/sys_hwprobe.c ++++ b/arch/riscv/kernel/sys_hwprobe.c +@@ -5,6 +5,9 @@ + * more details. + */ + #include ++#include ++#include ++#include + #include + #include + #include +@@ -450,28 +453,32 @@ static int hwprobe_get_cpus(struct riscv + return 0; + } + +-static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs, +- size_t pair_count, size_t cpusetsize, +- unsigned long __user *cpus_user, +- unsigned int flags) +-{ +- if (flags & RISCV_HWPROBE_WHICH_CPUS) +- return hwprobe_get_cpus(pairs, pair_count, cpusetsize, +- cpus_user, flags); ++#ifdef CONFIG_MMU + +- return hwprobe_get_values(pairs, pair_count, cpusetsize, +- cpus_user, flags); ++static DECLARE_COMPLETION(boot_probes_done); ++static atomic_t pending_boot_probes = ATOMIC_INIT(1); ++ ++void riscv_hwprobe_register_async_probe(void) ++{ ++ atomic_inc(&pending_boot_probes); + } + +-#ifdef CONFIG_MMU ++void riscv_hwprobe_complete_async_probe(void) ++{ ++ if (atomic_dec_and_test(&pending_boot_probes)) ++ complete(&boot_probes_done); ++} + +-static int __init init_hwprobe_vdso_data(void) ++static int complete_hwprobe_vdso_data(void) + { + struct vdso_arch_data *avd = vdso_k_arch_data; + u64 id_bitsmash = 0; + struct riscv_hwprobe pair; + int key; + ++ if (unlikely(!atomic_dec_and_test(&pending_boot_probes))) ++ wait_for_completion(&boot_probes_done); ++ + /* + * Initialize vDSO data with the answers for the "all CPUs" case, to + * save a syscall in the common case. +@@ -499,13 +506,52 @@ static int __init init_hwprobe_vdso_data + * vDSO should defer to the kernel for exotic cpu masks. + */ + avd->homogeneous_cpus = id_bitsmash != 0 && id_bitsmash != -1; ++ ++ /* ++ * Make sure all the VDSO values are visible before we look at them. ++ * This pairs with the implicit "no speculativly visible accesses" ++ * barrier in the VDSO hwprobe code. ++ */ ++ smp_wmb(); ++ avd->ready = true; ++ return 0; ++} ++ ++static int __init init_hwprobe_vdso_data(void) ++{ ++ struct vdso_arch_data *avd = vdso_k_arch_data; ++ ++ /* ++ * Prevent the vDSO cached values from being used, as they're not ready ++ * yet. ++ */ ++ avd->ready = false; + return 0; + } + + arch_initcall_sync(init_hwprobe_vdso_data); + ++#else ++ ++static int complete_hwprobe_vdso_data(void) { return 0; } ++ + #endif /* CONFIG_MMU */ + ++static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs, ++ size_t pair_count, size_t cpusetsize, ++ unsigned long __user *cpus_user, ++ unsigned int flags) ++{ ++ DO_ONCE_SLEEPABLE(complete_hwprobe_vdso_data); ++ ++ if (flags & RISCV_HWPROBE_WHICH_CPUS) ++ return hwprobe_get_cpus(pairs, pair_count, cpusetsize, ++ cpus_user, flags); ++ ++ return hwprobe_get_values(pairs, pair_count, cpusetsize, ++ cpus_user, flags); ++} ++ + SYSCALL_DEFINE5(riscv_hwprobe, struct riscv_hwprobe __user *, pairs, + size_t, pair_count, size_t, cpusetsize, unsigned long __user *, + cpus, unsigned int, flags) +--- a/arch/riscv/kernel/unaligned_access_speed.c ++++ b/arch/riscv/kernel/unaligned_access_speed.c +@@ -379,6 +379,7 @@ free: + static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused) + { + schedule_on_each_cpu(check_vector_unaligned_access); ++ riscv_hwprobe_complete_async_probe(); + + return 0; + } +@@ -473,8 +474,12 @@ static int __init check_unaligned_access + per_cpu(vector_misaligned_access, cpu) = unaligned_vector_speed_param; + } else if (!check_vector_unaligned_access_emulated_all_cpus() && + IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) { +- kthread_run(vec_check_unaligned_access_speed_all_cpus, +- NULL, "vec_check_unaligned_access_speed_all_cpus"); ++ riscv_hwprobe_register_async_probe(); ++ if (IS_ERR(kthread_run(vec_check_unaligned_access_speed_all_cpus, ++ NULL, "vec_check_unaligned_access_speed_all_cpus"))) { ++ pr_warn("Failed to create vec_unalign_check kthread\n"); ++ riscv_hwprobe_complete_async_probe(); ++ } + } + + /* +--- a/arch/riscv/kernel/vdso/hwprobe.c ++++ b/arch/riscv/kernel/vdso/hwprobe.c +@@ -27,7 +27,7 @@ static int riscv_vdso_get_values(struct + * homogeneous, then this function can handle requests for arbitrary + * masks. + */ +- if ((flags != 0) || (!all_cpus && !avd->homogeneous_cpus)) ++ if (flags != 0 || (!all_cpus && !avd->homogeneous_cpus) || unlikely(!avd->ready)) + return riscv_hwprobe(pairs, pair_count, cpusetsize, cpus, flags); + + /* This is something we can handle, fill out the pairs. */ diff --git a/queue-6.17/rust-device-fix-device-context-of-device-parent.patch b/queue-6.17/rust-device-fix-device-context-of-device-parent.patch new file mode 100644 index 0000000000..f64588aa3c --- /dev/null +++ b/queue-6.17/rust-device-fix-device-context-of-device-parent.patch @@ -0,0 +1,76 @@ +From cfec502b3d091ff7c24df6ccf8079470584315a0 Mon Sep 17 00:00:00 2001 +From: Danilo Krummrich +Date: Thu, 16 Oct 2025 15:31:44 +0200 +Subject: rust: device: fix device context of Device::parent() + +From: Danilo Krummrich + +commit cfec502b3d091ff7c24df6ccf8079470584315a0 upstream. + +Regardless of the DeviceContext of a device, we can't give any +guarantees about the DeviceContext of its parent device. + +This is very subtle, since it's only caused by a simple typo, i.e. + + Self::from_raw(parent) + +which preserves the DeviceContext in this case, vs. + + Device::from_raw(parent) + +which discards the DeviceContext. + +(I should have noticed it doing the correct thing in auxiliary::Device +subsequently, but somehow missed it.) + +Hence, fix both Device::parent() and auxiliary::Device::parent(). + +Cc: stable@vger.kernel.org +Fixes: a4c9f71e3440 ("rust: device: implement Device::parent()") +Reviewed-by: Alice Ryhl +Reviewed-by: Alexandre Courbot +Acked-by: Greg Kroah-Hartman +Signed-off-by: Danilo Krummrich +Signed-off-by: Greg Kroah-Hartman +--- + rust/kernel/auxiliary.rs | 8 +------- + rust/kernel/device.rs | 4 ++-- + 2 files changed, 3 insertions(+), 9 deletions(-) + +--- a/rust/kernel/auxiliary.rs ++++ b/rust/kernel/auxiliary.rs +@@ -217,13 +217,7 @@ impl Device< + + /// Returns a reference to the parent [`device::Device`], if any. + pub fn parent(&self) -> Option<&device::Device> { +- let ptr: *const Self = self; +- // CAST: `Device` types are transparent to each other. +- let ptr: *const Device = ptr.cast(); +- // SAFETY: `ptr` was derived from `&self`. +- let this = unsafe { &*ptr }; +- +- this.as_ref().parent() ++ self.as_ref().parent() + } + } + +--- a/rust/kernel/device.rs ++++ b/rust/kernel/device.rs +@@ -250,7 +250,7 @@ impl Device { + + /// Returns a reference to the parent device, if any. + #[cfg_attr(not(CONFIG_AUXILIARY_BUS), expect(dead_code))] +- pub(crate) fn parent(&self) -> Option<&Self> { ++ pub(crate) fn parent(&self) -> Option<&Device> { + // SAFETY: + // - By the type invariant `self.as_raw()` is always valid. + // - The parent device is only ever set at device creation. +@@ -263,7 +263,7 @@ impl Device { + // - Since `parent` is not NULL, it must be a valid pointer to a `struct device`. + // - `parent` is valid for the lifetime of `self`, since a `struct device` holds a + // reference count of its parent. +- Some(unsafe { Self::from_raw(parent) }) ++ Some(unsafe { Device::from_raw(parent) }) + } + } + diff --git a/queue-6.17/rv-fully-convert-enabled_monitors-to-use-list_head-as-iterator.patch b/queue-6.17/rv-fully-convert-enabled_monitors-to-use-list_head-as-iterator.patch new file mode 100644 index 0000000000..790a1ea899 --- /dev/null +++ b/queue-6.17/rv-fully-convert-enabled_monitors-to-use-list_head-as-iterator.patch @@ -0,0 +1,76 @@ +From 103541e6a5854b08a25e4caa61e990af1009a52e Mon Sep 17 00:00:00 2001 +From: Nam Cao +Date: Thu, 2 Oct 2025 08:22:35 +0000 +Subject: rv: Fully convert enabled_monitors to use list_head as iterator + +From: Nam Cao + +commit 103541e6a5854b08a25e4caa61e990af1009a52e upstream. + +The callbacks in enabled_monitors_seq_ops are inconsistent. Some treat the +iterator as struct rv_monitor *, while others treat the iterator as struct +list_head *. + +This causes a wrong type cast and crashes the system as reported by Nathan. + +Convert everything to use struct list_head * as iterator. This also makes +enabled_monitors consistent with available_monitors. + +Fixes: de090d1ccae1 ("rv: Fix wrong type cast in enabled_monitors_next()") +Reported-by: Nathan Chancellor +Closes: https://lore.kernel.org/linux-trace-kernel/20250923002004.GA2836051@ax162/ +Signed-off-by: Nam Cao +Cc: stable@vger.kernel.org +Reviewed-by: Gabriele Monaco +Link: https://lore.kernel.org/r/20251002082235.973099-1-namcao@linutronix.de +Signed-off-by: Gabriele Monaco +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/rv/rv.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/kernel/trace/rv/rv.c b/kernel/trace/rv/rv.c +index 48338520376f..43e9ea473cda 100644 +--- a/kernel/trace/rv/rv.c ++++ b/kernel/trace/rv/rv.c +@@ -501,7 +501,7 @@ static void *enabled_monitors_next(struct seq_file *m, void *p, loff_t *pos) + + list_for_each_entry_continue(mon, &rv_monitors_list, list) { + if (mon->enabled) +- return mon; ++ return &mon->list; + } + + return NULL; +@@ -509,7 +509,7 @@ static void *enabled_monitors_next(struct seq_file *m, void *p, loff_t *pos) + + static void *enabled_monitors_start(struct seq_file *m, loff_t *pos) + { +- struct rv_monitor *mon; ++ struct list_head *head; + loff_t l; + + mutex_lock(&rv_interface_lock); +@@ -517,15 +517,15 @@ static void *enabled_monitors_start(struct seq_file *m, loff_t *pos) + if (list_empty(&rv_monitors_list)) + return NULL; + +- mon = list_entry(&rv_monitors_list, struct rv_monitor, list); ++ head = &rv_monitors_list; + + for (l = 0; l <= *pos; ) { +- mon = enabled_monitors_next(m, mon, &l); +- if (!mon) ++ head = enabled_monitors_next(m, head, &l); ++ if (!head) + break; + } + +- return mon; ++ return head; + } + + /* +-- +2.51.1 + diff --git a/queue-6.17/rv-make-rtapp-pagefault-monitor-depends-on-config_mmu.patch b/queue-6.17/rv-make-rtapp-pagefault-monitor-depends-on-config_mmu.patch new file mode 100644 index 0000000000..5587251d8e --- /dev/null +++ b/queue-6.17/rv-make-rtapp-pagefault-monitor-depends-on-config_mmu.patch @@ -0,0 +1,43 @@ +From 3d62f95bd8450cebb4a4741bf83949cd54edd4a3 Mon Sep 17 00:00:00 2001 +From: Nam Cao +Date: Thu, 2 Oct 2025 08:23:17 +0000 +Subject: rv: Make rtapp/pagefault monitor depends on CONFIG_MMU + +From: Nam Cao + +commit 3d62f95bd8450cebb4a4741bf83949cd54edd4a3 upstream. + +There is no page fault without MMU. Compiling the rtapp/pagefault monitor +without CONFIG_MMU fails as page fault tracepoints' definitions are not +available. + +Make rtapp/pagefault monitor depends on CONFIG_MMU. + +Fixes: 9162620eb604 ("rv: Add rtapp_pagefault monitor") +Signed-off-by: Nam Cao +Reported-by: kernel test robot +Closes: https://lore.kernel.org/oe-kbuild-all/202509260455.6Z9Vkty4-lkp@intel.com/ +Cc: stable@vger.kernel.org +Reviewed-by: Gabriele Monaco +Link: https://lore.kernel.org/r/20251002082317.973839-1-namcao@linutronix.de +Signed-off-by: Gabriele Monaco +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/rv/monitors/pagefault/Kconfig | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/kernel/trace/rv/monitors/pagefault/Kconfig b/kernel/trace/rv/monitors/pagefault/Kconfig +index 5e16625f1653..0e013f00c33b 100644 +--- a/kernel/trace/rv/monitors/pagefault/Kconfig ++++ b/kernel/trace/rv/monitors/pagefault/Kconfig +@@ -5,6 +5,7 @@ config RV_MON_PAGEFAULT + select RV_LTL_MONITOR + depends on RV_MON_RTAPP + depends on X86 || RISCV ++ depends on MMU + default y + select LTL_MON_EVENTS_ID + bool "pagefault monitor" +-- +2.51.1 + diff --git a/queue-6.17/selftests-mptcp-join-mark-delete-re-add-signal-as-skipped-if-not-supported.patch b/queue-6.17/selftests-mptcp-join-mark-delete-re-add-signal-as-skipped-if-not-supported.patch new file mode 100644 index 0000000000..ac6435613d --- /dev/null +++ b/queue-6.17/selftests-mptcp-join-mark-delete-re-add-signal-as-skipped-if-not-supported.patch @@ -0,0 +1,36 @@ +From c3496c052ac36ea98ec4f8e95ae6285a425a2457 Mon Sep 17 00:00:00 2001 +From: "Matthieu Baerts (NGI0)" +Date: Mon, 20 Oct 2025 22:53:29 +0200 +Subject: selftests: mptcp: join: mark 'delete re-add signal' as skipped if not supported + +From: Matthieu Baerts (NGI0) + +commit c3496c052ac36ea98ec4f8e95ae6285a425a2457 upstream. + +The call to 'continue_if' was missing: it properly marks a subtest as +'skipped' if the attached condition is not valid. + +Without that, the test is wrongly marked as passed on older kernels. + +Fixes: b5e2fb832f48 ("selftests: mptcp: add explicit test case for remove/readd") +Cc: stable@vger.kernel.org +Reviewed-by: Geliang Tang +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20251020-net-mptcp-c-flag-late-add-addr-v1-4-8207030cb0e8@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/net/mptcp/mptcp_join.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh ++++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh +@@ -3852,7 +3852,7 @@ endpoint_tests() + + # remove and re-add + if reset_with_events "delete re-add signal" && +- mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then ++ continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=0 + pm_nl_set_limits $ns1 0 3 + pm_nl_set_limits $ns2 3 3 diff --git a/queue-6.17/selftests-mptcp-join-mark-flush-re-add-as-skipped-if-not-supported.patch b/queue-6.17/selftests-mptcp-join-mark-flush-re-add-as-skipped-if-not-supported.patch new file mode 100644 index 0000000000..97d6aa3626 --- /dev/null +++ b/queue-6.17/selftests-mptcp-join-mark-flush-re-add-as-skipped-if-not-supported.patch @@ -0,0 +1,36 @@ +From d68460bc31f9c8c6fc81fbb56ec952bec18409f1 Mon Sep 17 00:00:00 2001 +From: "Matthieu Baerts (NGI0)" +Date: Mon, 20 Oct 2025 22:53:27 +0200 +Subject: selftests: mptcp: join: mark 'flush re-add' as skipped if not supported + +From: Matthieu Baerts (NGI0) + +commit d68460bc31f9c8c6fc81fbb56ec952bec18409f1 upstream. + +The call to 'continue_if' was missing: it properly marks a subtest as +'skipped' if the attached condition is not valid. + +Without that, the test is wrongly marked as passed on older kernels. + +Fixes: e06959e9eebd ("selftests: mptcp: join: test for flush/re-add endpoints") +Cc: stable@vger.kernel.org +Reviewed-by: Geliang Tang +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20251020-net-mptcp-c-flag-late-add-addr-v1-2-8207030cb0e8@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/net/mptcp/mptcp_join.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh ++++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh +@@ -3927,7 +3927,7 @@ endpoint_tests() + + # flush and re-add + if reset_with_tcp_filter "flush re-add" ns2 10.0.3.2 REJECT OUTPUT && +- mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then ++ continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 1 2 + # broadcast IP: no packet for this address will be received on ns1 diff --git a/queue-6.17/selftests-mptcp-join-mark-implicit-tests-as-skipped-if-not-supported.patch b/queue-6.17/selftests-mptcp-join-mark-implicit-tests-as-skipped-if-not-supported.patch new file mode 100644 index 0000000000..60a2c71290 --- /dev/null +++ b/queue-6.17/selftests-mptcp-join-mark-implicit-tests-as-skipped-if-not-supported.patch @@ -0,0 +1,45 @@ +From 973f80d715bd2504b4db6e049f292e694145cd79 Mon Sep 17 00:00:00 2001 +From: "Matthieu Baerts (NGI0)" +Date: Mon, 20 Oct 2025 22:53:28 +0200 +Subject: selftests: mptcp: join: mark implicit tests as skipped if not supported + +From: Matthieu Baerts (NGI0) + +commit 973f80d715bd2504b4db6e049f292e694145cd79 upstream. + +The call to 'continue_if' was missing: it properly marks a subtest as +'skipped' if the attached condition is not valid. + +Without that, the test is wrongly marked as passed on older kernels. + +Fixes: 36c4127ae8dd ("selftests: mptcp: join: skip implicit tests if not supported") +Cc: stable@vger.kernel.org +Reviewed-by: Geliang Tang +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20251020-net-mptcp-c-flag-late-add-addr-v1-3-8207030cb0e8@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/net/mptcp/mptcp_join.sh | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh ++++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh +@@ -3751,7 +3751,7 @@ endpoint_tests() + # subflow_rebuild_header is needed to support the implicit flag + # userspace pm type prevents add_addr + if reset "implicit EP" && +- mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then ++ continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + pm_nl_set_limits $ns1 2 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal +@@ -3776,7 +3776,7 @@ endpoint_tests() + fi + + if reset_with_tcp_filter "delete and re-add" ns2 10.0.3.2 REJECT OUTPUT && +- mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then ++ continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + start_events + pm_nl_set_limits $ns1 0 3 + pm_nl_set_limits $ns2 0 3 diff --git a/queue-6.17/series b/queue-6.17/series index 22096c5823..c33e8bdaa6 100644 --- a/queue-6.17/series +++ b/queue-6.17/series @@ -59,3 +59,56 @@ net-datagram-introduce-datagram_poll_queue-for-custo.patch ovpn-use-datagram_poll_queue-for-socket-readiness-in.patch net-phy-micrel-always-set-shared-phydev-for-lan8814.patch net-mlx5-fix-ipsec-cleanup-over-mpv-device.patch +fs-notify-call-exportfs_encode_fid-with-s_umount.patch +net-bonding-fix-possible-peer-notify-event-loss-or-dup-issue.patch +hung_task-fix-warnings-caused-by-unaligned-lock-pointers.patch +mm-don-t-spin-in-add_stack_record-when-gfp-flags-don-t-allow.patch +dma-debug-don-t-report-false-positives-with-dma_bounce_unaligned_kmalloc.patch +virtio-net-zero-unused-hash-fields.patch +arch_topology-fix-incorrect-error-check-in-topology_parse_cpu_capacity.patch +riscv-hwprobe-fix-stale-vdso-data-for-late-initialized-keys-at-boot.patch +io_uring-sqpoll-switch-away-from-getrusage-for-cpu-accounting.patch +io_uring-sqpoll-be-smarter-on-when-to-update-the-stime-usage.patch +btrfs-directly-free-partially-initialized-fs_info-in-btrfs_check_leaked_roots.patch +btrfs-send-fix-duplicated-rmdir-operations-when-using-extrefs.patch +btrfs-ref-verify-fix-is_err-vs-null-check-in-btrfs_build_ref_tree.patch +gpio-pci-idio-16-define-maximum-valid-register-address-offset.patch +gpio-104-idio-16-define-maximum-valid-register-address-offset.patch +xfs-fix-locking-in-xchk_nlinks_collect_dir.patch +platform-x86-alienware-wmi-wmax-add-awcc-support-to-dell-g15-5530.patch +platform-x86-alienware-wmi-wmax-fix-null-pointer-dereference-in-sleep-handlers.patch +revert-cpuidle-menu-avoid-discarding-useful-information.patch +riscv-cpufeature-avoid-uninitialized-variable-in-has_thead_homogeneous_vlenb.patch +rust-device-fix-device-context-of-device-parent.patch +slab-avoid-race-on-slab-obj_exts-in-alloc_slab_obj_exts.patch +slab-fix-obj_ext-mistakenly-considered-null-due-to-race-condition.patch +smb-client-get-rid-of-d_drop-in-cifs_do_rename.patch +acpica-work-around-bogus-wstringop-overread-warning-since-gcc-11.patch +arm64-mte-do-not-warn-if-the-page-is-already-tagged-in-copy_highpage.patch +can-netlink-can_changelink-allow-disabling-of-automatic-restart.patch +cifs-fix-tcp_server_info-credits-to-be-signed.patch +devcoredump-fix-circular-locking-dependency-with-devcd-mutex.patch +hwmon-pmbus-max34440-update-adpm12160-coeff-due-to-latest-fw.patch +mips-malta-fix-keyboard-resource-preventing-i8042-driver-from-registering.patch +ocfs2-clear-extent-cache-after-moving-defragmenting-extents.patch +rv-fully-convert-enabled_monitors-to-use-list_head-as-iterator.patch +rv-make-rtapp-pagefault-monitor-depends-on-config_mmu.patch +vsock-fix-lock-inversion-in-vsock_assign_transport.patch +net-bonding-update-the-slave-array-for-broadcast-mode.patch +net-stmmac-dwmac-rk-fix-disabling-set_clock_selection.patch +net-usb-rtl8150-fix-frame-padding.patch +net-ravb-enforce-descriptor-type-ordering.patch +net-ravb-ensure-memory-write-completes-before-ringing-tx-doorbell.patch +mptcp-pm-in-kernel-c-flag-handle-late-add_addr.patch +selftests-mptcp-join-mark-flush-re-add-as-skipped-if-not-supported.patch +selftests-mptcp-join-mark-implicit-tests-as-skipped-if-not-supported.patch +selftests-mptcp-join-mark-delete-re-add-signal-as-skipped-if-not-supported.patch +mm-prevent-poison-consumption-when-splitting-thp.patch +mm-mremap-correctly-account-old-mapping-after-mremap_dontunmap-remap.patch +drm-xe-check-return-value-of-ggtt-workqueue-allocation.patch +drm-amd-display-increase-max-link-count-and-fix-link-enc-null-pointer-access.patch +mm-damon-core-use-damos_commit_quota_goal-for-new-goal-commit.patch +mm-damon-core-fix-list_add_tail-call-on-damon_call.patch +mm-damon-core-fix-potential-memory-leak-by-cleaning-ops_filter-in-damon_destroy_scheme.patch +mm-damon-sysfs-catch-commit-test-ctx-alloc-failure.patch +mm-damon-sysfs-dealloc-commit-test-ctx-always.patch diff --git a/queue-6.17/slab-avoid-race-on-slab-obj_exts-in-alloc_slab_obj_exts.patch b/queue-6.17/slab-avoid-race-on-slab-obj_exts-in-alloc_slab_obj_exts.patch new file mode 100644 index 0000000000..0d6b8e32c0 --- /dev/null +++ b/queue-6.17/slab-avoid-race-on-slab-obj_exts-in-alloc_slab_obj_exts.patch @@ -0,0 +1,72 @@ +From 6ed8bfd24ce1cb31742b09a3eb557cd008533eec Mon Sep 17 00:00:00 2001 +From: Hao Ge +Date: Tue, 21 Oct 2025 09:03:53 +0800 +Subject: slab: Avoid race on slab->obj_exts in alloc_slab_obj_exts + +From: Hao Ge + +commit 6ed8bfd24ce1cb31742b09a3eb557cd008533eec upstream. + +If two competing threads enter alloc_slab_obj_exts() and one of them +fails to allocate the object extension vector, it might override the +valid slab->obj_exts allocated by the other thread with +OBJEXTS_ALLOC_FAIL. This will cause the thread that lost this race and +expects a valid pointer to dereference a NULL pointer later on. + +Update slab->obj_exts atomically using cmpxchg() to avoid +slab->obj_exts overrides by racing threads. + +Thanks for Vlastimil and Suren's help with debugging. + +Fixes: f7381b911640 ("slab: mark slab->obj_exts allocation failures unconditionally") +Cc: +Suggested-by: Suren Baghdasaryan +Signed-off-by: Hao Ge +Reviewed-by: Harry Yoo +Reviewed-by: Suren Baghdasaryan +Link: https://patch.msgid.link/20251021010353.1187193-1-hao.ge@linux.dev +Signed-off-by: Vlastimil Babka +Signed-off-by: Greg Kroah-Hartman +--- + mm/slub.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -1978,7 +1978,7 @@ static inline void mark_objexts_empty(st + + static inline void mark_failed_objexts_alloc(struct slab *slab) + { +- slab->obj_exts = OBJEXTS_ALLOC_FAIL; ++ cmpxchg(&slab->obj_exts, 0, OBJEXTS_ALLOC_FAIL); + } + + static inline void handle_failed_objexts_alloc(unsigned long obj_exts, +@@ -2043,6 +2043,7 @@ int alloc_slab_obj_exts(struct slab *sla + #ifdef CONFIG_MEMCG + new_exts |= MEMCG_DATA_OBJEXTS; + #endif ++retry: + old_exts = READ_ONCE(slab->obj_exts); + handle_failed_objexts_alloc(old_exts, vec, objects); + if (new_slab) { +@@ -2052,8 +2053,7 @@ int alloc_slab_obj_exts(struct slab *sla + * be simply assigned. + */ + slab->obj_exts = new_exts; +- } else if ((old_exts & ~OBJEXTS_FLAGS_MASK) || +- cmpxchg(&slab->obj_exts, old_exts, new_exts) != old_exts) { ++ } else if (old_exts & ~OBJEXTS_FLAGS_MASK) { + /* + * If the slab is already in use, somebody can allocate and + * assign slabobj_exts in parallel. In this case the existing +@@ -2062,6 +2062,9 @@ int alloc_slab_obj_exts(struct slab *sla + mark_objexts_empty(vec); + kfree(vec); + return 0; ++ } else if (cmpxchg(&slab->obj_exts, old_exts, new_exts) != old_exts) { ++ /* Retry if a racing thread changed slab->obj_exts from under us. */ ++ goto retry; + } + + kmemleak_not_leak(vec); diff --git a/queue-6.17/slab-fix-obj_ext-mistakenly-considered-null-due-to-race-condition.patch b/queue-6.17/slab-fix-obj_ext-mistakenly-considered-null-due-to-race-condition.patch new file mode 100644 index 0000000000..bd4f797ae1 --- /dev/null +++ b/queue-6.17/slab-fix-obj_ext-mistakenly-considered-null-due-to-race-condition.patch @@ -0,0 +1,73 @@ +From 7f434e1d9a17ca5f567c9796c9c105a65c18db9a Mon Sep 17 00:00:00 2001 +From: Hao Ge +Date: Thu, 23 Oct 2025 22:33:13 +0800 +Subject: slab: Fix obj_ext mistakenly considered NULL due to race condition + +From: Hao Ge + +commit 7f434e1d9a17ca5f567c9796c9c105a65c18db9a upstream. + +If two competing threads enter alloc_slab_obj_exts(), and the one that +allocates the vector wins the cmpxchg(), the other thread that failed +allocation mistakenly assumes that slab->obj_exts is still empty due to +its own allocation failure. This will then trigger warnings with +CONFIG_MEM_ALLOC_PROFILING_DEBUG checks in the subsequent free path. + +Therefore, let's check the result of cmpxchg() to see if marking the +allocation as failed was successful. If it wasn't, check whether the +winning side has succeeded its allocation (it might have been also +marking it as failed) and if yes, return success. + +Suggested-by: Harry Yoo +Fixes: f7381b911640 ("slab: mark slab->obj_exts allocation failures unconditionally") +Cc: +Signed-off-by: Hao Ge +Link: https://patch.msgid.link/20251023143313.1327968-1-hao.ge@linux.dev +Reviewed-by: Suren Baghdasaryan +Reviewed-by: Harry Yoo +Signed-off-by: Vlastimil Babka +Signed-off-by: Greg Kroah-Hartman +--- + mm/slub.c | 16 +++++++++++----- + 1 file changed, 11 insertions(+), 5 deletions(-) + +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -1976,9 +1976,9 @@ static inline void mark_objexts_empty(st + } + } + +-static inline void mark_failed_objexts_alloc(struct slab *slab) ++static inline bool mark_failed_objexts_alloc(struct slab *slab) + { +- cmpxchg(&slab->obj_exts, 0, OBJEXTS_ALLOC_FAIL); ++ return cmpxchg(&slab->obj_exts, 0, OBJEXTS_ALLOC_FAIL) == 0; + } + + static inline void handle_failed_objexts_alloc(unsigned long obj_exts, +@@ -2000,7 +2000,7 @@ static inline void handle_failed_objexts + #else /* CONFIG_MEM_ALLOC_PROFILING_DEBUG */ + + static inline void mark_objexts_empty(struct slabobj_ext *obj_exts) {} +-static inline void mark_failed_objexts_alloc(struct slab *slab) {} ++static inline bool mark_failed_objexts_alloc(struct slab *slab) { return false; } + static inline void handle_failed_objexts_alloc(unsigned long obj_exts, + struct slabobj_ext *vec, unsigned int objects) {} + +@@ -2033,8 +2033,14 @@ int alloc_slab_obj_exts(struct slab *sla + vec = kcalloc_node(objects, sizeof(struct slabobj_ext), gfp, + slab_nid(slab)); + if (!vec) { +- /* Mark vectors which failed to allocate */ +- mark_failed_objexts_alloc(slab); ++ /* ++ * Try to mark vectors which failed to allocate. ++ * If this operation fails, there may be a racing process ++ * that has already completed the allocation. ++ */ ++ if (!mark_failed_objexts_alloc(slab) && ++ slab_obj_exts(slab)) ++ return 0; + + return -ENOMEM; + } diff --git a/queue-6.17/smb-client-get-rid-of-d_drop-in-cifs_do_rename.patch b/queue-6.17/smb-client-get-rid-of-d_drop-in-cifs_do_rename.patch new file mode 100644 index 0000000000..75f6b9b7c5 --- /dev/null +++ b/queue-6.17/smb-client-get-rid-of-d_drop-in-cifs_do_rename.patch @@ -0,0 +1,39 @@ +From 72ed55b4c335703c203b942972558173e1e5ddee Mon Sep 17 00:00:00 2001 +From: Paulo Alcantara +Date: Wed, 22 Oct 2025 21:11:01 -0300 +Subject: smb: client: get rid of d_drop() in cifs_do_rename() + +From: Paulo Alcantara + +commit 72ed55b4c335703c203b942972558173e1e5ddee upstream. + +There is no need to force a lookup by unhashing the moved dentry after +successfully renaming the file on server. The file metadata will be +re-fetched from server, if necessary, in the next call to +->d_revalidate() anyways. + +Signed-off-by: Paulo Alcantara (Red Hat) +Reviewed-by: David Howells +Cc: stable@vger.kernel.org +Cc: linux-cifs@vger.kernel.org +Signed-off-by: Steve French +Signed-off-by: Greg Kroah-Hartman +--- + fs/smb/client/inode.c | 5 +---- + 1 file changed, 1 insertion(+), 4 deletions(-) + +--- a/fs/smb/client/inode.c ++++ b/fs/smb/client/inode.c +@@ -2484,11 +2484,8 @@ cifs_do_rename(const unsigned int xid, s + } + #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ + do_rename_exit: +- if (rc == 0) { ++ if (rc == 0) + d_move(from_dentry, to_dentry); +- /* Force a new lookup */ +- d_drop(from_dentry); +- } + cifs_put_tlink(tlink); + return rc; + } diff --git a/queue-6.17/virtio-net-zero-unused-hash-fields.patch b/queue-6.17/virtio-net-zero-unused-hash-fields.patch new file mode 100644 index 0000000000..019a7d63cf --- /dev/null +++ b/queue-6.17/virtio-net-zero-unused-hash-fields.patch @@ -0,0 +1,44 @@ +From b2284768c6b32aa224ca7d0ef0741beb434f03aa Mon Sep 17 00:00:00 2001 +From: Jason Wang +Date: Wed, 22 Oct 2025 11:44:21 +0800 +Subject: virtio-net: zero unused hash fields + +From: Jason Wang + +commit b2284768c6b32aa224ca7d0ef0741beb434f03aa upstream. + +When GSO tunnel is negotiated virtio_net_hdr_tnl_from_skb() tries to +initialize the tunnel metadata but forget to zero unused rxhash +fields. This may leak information to another side. Fixing this by +zeroing the unused hash fields. + +Acked-by: Michael S. Tsirkin +Fixes: a2fb4bc4e2a6a ("net: implement virtio helpers to handle UDP GSO tunneling") +Cc: +Signed-off-by: Jason Wang +Reviewed-by: Xuan Zhuo +Link: https://patch.msgid.link/20251022034421.70244-1-jasowang@redhat.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/virtio_net.h | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h +index 20e0584db1dd..4d1780848d0e 100644 +--- a/include/linux/virtio_net.h ++++ b/include/linux/virtio_net.h +@@ -401,6 +401,10 @@ virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb, + if (!tnl_hdr_negotiated) + return -EINVAL; + ++ vhdr->hash_hdr.hash_value = 0; ++ vhdr->hash_hdr.hash_report = 0; ++ vhdr->hash_hdr.padding = 0; ++ + /* Let the basic parsing deal with plain GSO features. */ + skb_shinfo(skb)->gso_type &= ~tnl_gso_type; + ret = virtio_net_hdr_from_skb(skb, hdr, true, false, vlan_hlen); +-- +2.51.1 + diff --git a/queue-6.17/vsock-fix-lock-inversion-in-vsock_assign_transport.patch b/queue-6.17/vsock-fix-lock-inversion-in-vsock_assign_transport.patch new file mode 100644 index 0000000000..dad289b401 --- /dev/null +++ b/queue-6.17/vsock-fix-lock-inversion-in-vsock_assign_transport.patch @@ -0,0 +1,95 @@ +From f7c877e7535260cc7a21484c994e8ce7e8cb6780 Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Tue, 21 Oct 2025 14:17:18 +0200 +Subject: vsock: fix lock inversion in vsock_assign_transport() + +From: Stefano Garzarella + +commit f7c877e7535260cc7a21484c994e8ce7e8cb6780 upstream. + +Syzbot reported a potential lock inversion deadlock between +vsock_register_mutex and sk_lock-AF_VSOCK when vsock_linger() is called. + +The issue was introduced by commit 687aa0c5581b ("vsock: Fix +transport_* TOCTOU") which added vsock_register_mutex locking in +vsock_assign_transport() around the transport->release() call, that can +call vsock_linger(). vsock_assign_transport() can be called with sk_lock +held. vsock_linger() calls sk_wait_event() that temporarily releases and +re-acquires sk_lock. During this window, if another thread hold +vsock_register_mutex while trying to acquire sk_lock, a circular +dependency is created. + +Fix this by releasing vsock_register_mutex before calling +transport->release() and vsock_deassign_transport(). This is safe +because we don't need to hold vsock_register_mutex while releasing the +old transport, and we ensure the new transport won't disappear by +obtaining a module reference first via try_module_get(). + +Reported-by: syzbot+10e35716f8e4929681fa@syzkaller.appspotmail.com +Tested-by: syzbot+10e35716f8e4929681fa@syzkaller.appspotmail.com +Fixes: 687aa0c5581b ("vsock: Fix transport_* TOCTOU") +Cc: mhal@rbox.co +Cc: stable@vger.kernel.org +Signed-off-by: Stefano Garzarella +Link: https://patch.msgid.link/20251021121718.137668-1-sgarzare@redhat.com +Signed-off-by: Paolo Abeni +Signed-off-by: Greg Kroah-Hartman +--- + net/vmw_vsock/af_vsock.c | 38 +++++++++++++++++++------------------- + 1 file changed, 19 insertions(+), 19 deletions(-) + +--- a/net/vmw_vsock/af_vsock.c ++++ b/net/vmw_vsock/af_vsock.c +@@ -487,12 +487,26 @@ int vsock_assign_transport(struct vsock_ + goto err; + } + +- if (vsk->transport) { +- if (vsk->transport == new_transport) { +- ret = 0; +- goto err; +- } ++ if (vsk->transport && vsk->transport == new_transport) { ++ ret = 0; ++ goto err; ++ } ++ ++ /* We increase the module refcnt to prevent the transport unloading ++ * while there are open sockets assigned to it. ++ */ ++ if (!new_transport || !try_module_get(new_transport->module)) { ++ ret = -ENODEV; ++ goto err; ++ } ++ ++ /* It's safe to release the mutex after a successful try_module_get(). ++ * Whichever transport `new_transport` points at, it won't go away until ++ * the last module_put() below or in vsock_deassign_transport(). ++ */ ++ mutex_unlock(&vsock_register_mutex); + ++ if (vsk->transport) { + /* transport->release() must be called with sock lock acquired. + * This path can only be taken during vsock_connect(), where we + * have already held the sock lock. In the other cases, this +@@ -512,20 +526,6 @@ int vsock_assign_transport(struct vsock_ + vsk->peer_shutdown = 0; + } + +- /* We increase the module refcnt to prevent the transport unloading +- * while there are open sockets assigned to it. +- */ +- if (!new_transport || !try_module_get(new_transport->module)) { +- ret = -ENODEV; +- goto err; +- } +- +- /* It's safe to release the mutex after a successful try_module_get(). +- * Whichever transport `new_transport` points at, it won't go away until +- * the last module_put() below or in vsock_deassign_transport(). +- */ +- mutex_unlock(&vsock_register_mutex); +- + if (sk->sk_type == SOCK_SEQPACKET) { + if (!new_transport->seqpacket_allow || + !new_transport->seqpacket_allow(remote_cid)) { diff --git a/queue-6.17/xfs-fix-locking-in-xchk_nlinks_collect_dir.patch b/queue-6.17/xfs-fix-locking-in-xchk_nlinks_collect_dir.patch new file mode 100644 index 0000000000..2da405f7bc --- /dev/null +++ b/queue-6.17/xfs-fix-locking-in-xchk_nlinks_collect_dir.patch @@ -0,0 +1,93 @@ +From f477af0cfa0487eddec66ffe10fd9df628ba6f52 Mon Sep 17 00:00:00 2001 +From: "Darrick J. Wong" +Date: Tue, 21 Oct 2025 11:30:43 -0700 +Subject: xfs: fix locking in xchk_nlinks_collect_dir + +From: Darrick J. Wong + +commit f477af0cfa0487eddec66ffe10fd9df628ba6f52 upstream. + +On a filesystem with parent pointers, xchk_nlinks_collect_dir walks both +the directory entries (data fork) and the parent pointers (attr fork) to +determine the correct link count. Unfortunately I forgot to update the +lock mode logic to handle the case of a directory whose attr fork is in +btree format and has not yet been loaded *and* whose data fork doesn't +need loading. + +This leads to a bunch of assertions from xfs/286 in xfs_iread_extents +because we only took ILOCK_SHARED, not ILOCK_EXCL. You'd need the rare +happenstance of a directory with a large number of non-pptr extended +attributes set and enough memory pressure to cause the directory to be +evicted and partially reloaded from disk. + +I /think/ this only started in 6.18-rc1 because I've started seeing OOM +errors with the maple tree slab using 70% of memory, and this didn't +happen in 6.17. Yay dynamic systems! + +Cc: stable@vger.kernel.org # v6.10 +Fixes: 77ede5f44b0d86 ("xfs: walk directory parent pointers to determine backref count") +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Signed-off-by: Carlos Maiolino +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/scrub/nlinks.c | 34 +++++++++++++++++++++++++++++++--- + 1 file changed, 31 insertions(+), 3 deletions(-) + +--- a/fs/xfs/scrub/nlinks.c ++++ b/fs/xfs/scrub/nlinks.c +@@ -376,6 +376,36 @@ out_incomplete: + return error; + } + ++static uint ++xchk_nlinks_ilock_dir( ++ struct xfs_inode *ip) ++{ ++ uint lock_mode = XFS_ILOCK_SHARED; ++ ++ /* ++ * We're going to scan the directory entries, so we must be ready to ++ * pull the data fork mappings into memory if they aren't already. ++ */ ++ if (xfs_need_iread_extents(&ip->i_df)) ++ lock_mode = XFS_ILOCK_EXCL; ++ ++ /* ++ * We're going to scan the parent pointers, so we must be ready to ++ * pull the attr fork mappings into memory if they aren't already. ++ */ ++ if (xfs_has_parent(ip->i_mount) && xfs_inode_has_attr_fork(ip) && ++ xfs_need_iread_extents(&ip->i_af)) ++ lock_mode = XFS_ILOCK_EXCL; ++ ++ /* ++ * Take the IOLOCK so that other threads cannot start a directory ++ * update while we're scanning. ++ */ ++ lock_mode |= XFS_IOLOCK_SHARED; ++ xfs_ilock(ip, lock_mode); ++ return lock_mode; ++} ++ + /* Walk a directory to bump the observed link counts of the children. */ + STATIC int + xchk_nlinks_collect_dir( +@@ -394,8 +424,7 @@ xchk_nlinks_collect_dir( + return 0; + + /* Prevent anyone from changing this directory while we walk it. */ +- xfs_ilock(dp, XFS_IOLOCK_SHARED); +- lock_mode = xfs_ilock_data_map_shared(dp); ++ lock_mode = xchk_nlinks_ilock_dir(dp); + + /* + * The dotdot entry of an unlinked directory still points to the last +@@ -452,7 +481,6 @@ out_abort: + xchk_iscan_abort(&xnc->collect_iscan); + out_unlock: + xfs_iunlock(dp, lock_mode); +- xfs_iunlock(dp, XFS_IOLOCK_SHARED); + return error; + } +