From f38de91dc87f0a71b773bb2c0a96a3cef96babf3 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 21 Oct 2025 20:01:08 +0200 Subject: [PATCH] 6.12-stable patches added patches: d_alloc_parallel-set-dcache_par_lookup-earlier.patch hfsplus-fix-slab-out-of-bounds-read-in-hfsplus_strcasecmp.patch iio-imu-inv_icm42600-avoid-configuring-if-already-pm_runtime-suspended.patch iio-imu-inv_icm42600-simplify-pm_runtime-setup.patch ipv4-adopt-dst_dev-skb_dst_dev-and-skb_dst_dev_net.patch ixgbevf-add-support-for-intel-r-e610-device.patch ixgbevf-fix-getting-link-speed-data-for-e610-devices.patch ixgbevf-fix-mailbox-api-compatibility-by-negotiating-supported-features.patch md-fix-mssing-blktrace-bio-split-events.patch md-raid0-handle-bio_split-errors.patch md-raid1-handle-bio_split-errors.patch md-raid10-handle-bio_split-errors.patch mptcp-call-dst_release-in-mptcp_active_enable.patch mptcp-reset-blackhole-on-success-with-non-loopback-ifaces.patch mptcp-use-__sk_dst_get-and-dst_dev_rcu-in-mptcp_active_enable.patch net-add-locking-to-protect-skb-dev-access-in-ip_output.patch net-dst-add-four-helpers-to-annotate-data-races-around-dst-dev.patch nfsd-define-a-proc_layoutcommit-for-the-flexfiles-layout-type.patch nfsd-drop-dprintk-in-blocklayout-xdr-functions.patch nfsd-fix-last-write-offset-handling-in-layoutcommit.patch nfsd-implement-large-extent-array-support-in-pnfs.patch nfsd-minor-cleanup-in-layoutcommit-processing.patch nfsd-rework-encoding-and-decoding-of-nfsd4_deviceid.patch nfsd-use-correct-error-code-when-decoding-extents.patch padata-reset-next-cpu-when-reorder-sequence-wraps-around.patch pci-add-pci_vdevice_sub-helper-macro.patch phy-cadence-cdns-dphy-fix-pll-lock-and-o_cmn_ready-polling.patch phy-cadence-cdns-dphy-update-calibration-wait-time-for-startup-state-machine.patch phy-cdns-dphy-store-hs_clk_rate-and-return-it.patch pm-runtime-add-new-devm-functions.patch tcp-cache-rtax_quickack-metric-in-a-hot-cache-line.patch tcp-convert-to-dev_net_rcu.patch vfs-don-t-leak-disconnected-dentries-on-umount.patch wifi-rtw89-avoid-possible-tx-wait-initialization-race.patch x86-resctrl-fix-miscount-of-bandwidth-event-when-reactivating-previously-unavailable-rmid.patch x86-resctrl-refactor-resctrl_arch_rmid_read.patch xfs-fix-log-crc-mismatches-between-i386-and-other-architectures.patch xfs-rename-the-old_crc-variable-in-xlog_recover_process.patch xfs-use-deferred-intent-items-for-reaping-crosslinked-blocks.patch --- ...rallel-set-dcache_par_lookup-earlier.patch | 70 ++++ ...of-bounds-read-in-hfsplus_strcasecmp.patch | 223 ++++++++++ ...ring-if-already-pm_runtime-suspended.patch | 66 +++ ...v_icm42600-simplify-pm_runtime-setup.patch | 86 ++++ ..._dev-skb_dst_dev-and-skb_dst_dev_net.patch | 384 ++++++++++++++++++ ...-add-support-for-intel-r-e610-device.patch | 171 ++++++++ ...ing-link-speed-data-for-e610-devices.patch | 306 ++++++++++++++ ...ty-by-negotiating-supported-features.patch | 327 +++++++++++++++ ...fix-mssing-blktrace-bio-split-events.patch | 142 +++++++ .../md-raid0-handle-bio_split-errors.patch | 55 +++ .../md-raid1-handle-bio_split-errors.patch | 110 +++++ .../md-raid10-handle-bio_split-errors.patch | 135 ++++++ ...l-dst_release-in-mptcp_active_enable.patch | 42 ++ ...-on-success-with-non-loopback-ifaces.patch | 52 +++ ...d-dst_dev_rcu-in-mptcp_active_enable.patch | 55 +++ ...-protect-skb-dev-access-in-ip_output.patch | 121 ++++++ ...o-annotate-data-races-around-dst-dev.patch | 135 ++++++ ...commit-for-the-flexfiles-layout-type.patch | 49 +++ ...dprintk-in-blocklayout-xdr-functions.patch | 129 ++++++ ...rite-offset-handling-in-layoutcommit.patch | 113 ++++++ ...t-large-extent-array-support-in-pnfs.patch | 335 +++++++++++++++ ...r-cleanup-in-layoutcommit-processing.patch | 50 +++ ...oding-and-decoding-of-nfsd4_deviceid.patch | 156 +++++++ ...ect-error-code-when-decoding-extents.patch | 234 +++++++++++ ...u-when-reorder-sequence-wraps-around.patch | 44 ++ ...pci-add-pci_vdevice_sub-helper-macro.patch | 51 +++ ...fix-pll-lock-and-o_cmn_ready-polling.patch | 265 ++++++++++++ ...-wait-time-for-startup-state-machine.patch | 58 +++ ...dphy-store-hs_clk_rate-and-return-it.patch | 59 +++ .../pm-runtime-add-new-devm-functions.patch | 109 +++++ queue-6.12/series | 39 ++ ..._quickack-metric-in-a-hot-cache-line.patch | 81 ++++ queue-6.12/tcp-convert-to-dev_net_rcu.patch | 197 +++++++++ ...leak-disconnected-dentries-on-umount.patch | 57 +++ ...possible-tx-wait-initialization-race.patch | 230 +++++++++++ ...tivating-previously-unavailable-rmid.patch | 149 +++++++ ...ctrl-refactor-resctrl_arch_rmid_read.patch | 89 ++++ ...between-i386-and-other-architectures.patch | 172 ++++++++ ...crc-variable-in-xlog_recover_process.patch | 68 ++++ ...items-for-reaping-crosslinked-blocks.patch | 55 +++ 40 files changed, 5269 insertions(+) create mode 100644 queue-6.12/d_alloc_parallel-set-dcache_par_lookup-earlier.patch create mode 100644 queue-6.12/hfsplus-fix-slab-out-of-bounds-read-in-hfsplus_strcasecmp.patch create mode 100644 queue-6.12/iio-imu-inv_icm42600-avoid-configuring-if-already-pm_runtime-suspended.patch create mode 100644 queue-6.12/iio-imu-inv_icm42600-simplify-pm_runtime-setup.patch create mode 100644 queue-6.12/ipv4-adopt-dst_dev-skb_dst_dev-and-skb_dst_dev_net.patch create mode 100644 queue-6.12/ixgbevf-add-support-for-intel-r-e610-device.patch create mode 100644 queue-6.12/ixgbevf-fix-getting-link-speed-data-for-e610-devices.patch create mode 100644 queue-6.12/ixgbevf-fix-mailbox-api-compatibility-by-negotiating-supported-features.patch create mode 100644 queue-6.12/md-fix-mssing-blktrace-bio-split-events.patch create mode 100644 queue-6.12/md-raid0-handle-bio_split-errors.patch create mode 100644 queue-6.12/md-raid1-handle-bio_split-errors.patch create mode 100644 queue-6.12/md-raid10-handle-bio_split-errors.patch create mode 100644 queue-6.12/mptcp-call-dst_release-in-mptcp_active_enable.patch create mode 100644 queue-6.12/mptcp-reset-blackhole-on-success-with-non-loopback-ifaces.patch create mode 100644 queue-6.12/mptcp-use-__sk_dst_get-and-dst_dev_rcu-in-mptcp_active_enable.patch create mode 100644 queue-6.12/net-add-locking-to-protect-skb-dev-access-in-ip_output.patch create mode 100644 queue-6.12/net-dst-add-four-helpers-to-annotate-data-races-around-dst-dev.patch create mode 100644 queue-6.12/nfsd-define-a-proc_layoutcommit-for-the-flexfiles-layout-type.patch create mode 100644 queue-6.12/nfsd-drop-dprintk-in-blocklayout-xdr-functions.patch create mode 100644 queue-6.12/nfsd-fix-last-write-offset-handling-in-layoutcommit.patch create mode 100644 queue-6.12/nfsd-implement-large-extent-array-support-in-pnfs.patch create mode 100644 queue-6.12/nfsd-minor-cleanup-in-layoutcommit-processing.patch create mode 100644 queue-6.12/nfsd-rework-encoding-and-decoding-of-nfsd4_deviceid.patch create mode 100644 queue-6.12/nfsd-use-correct-error-code-when-decoding-extents.patch create mode 100644 queue-6.12/padata-reset-next-cpu-when-reorder-sequence-wraps-around.patch create mode 100644 queue-6.12/pci-add-pci_vdevice_sub-helper-macro.patch create mode 100644 queue-6.12/phy-cadence-cdns-dphy-fix-pll-lock-and-o_cmn_ready-polling.patch create mode 100644 queue-6.12/phy-cadence-cdns-dphy-update-calibration-wait-time-for-startup-state-machine.patch create mode 100644 queue-6.12/phy-cdns-dphy-store-hs_clk_rate-and-return-it.patch create mode 100644 queue-6.12/pm-runtime-add-new-devm-functions.patch create mode 100644 queue-6.12/tcp-cache-rtax_quickack-metric-in-a-hot-cache-line.patch create mode 100644 queue-6.12/tcp-convert-to-dev_net_rcu.patch create mode 100644 queue-6.12/vfs-don-t-leak-disconnected-dentries-on-umount.patch create mode 100644 queue-6.12/wifi-rtw89-avoid-possible-tx-wait-initialization-race.patch create mode 100644 queue-6.12/x86-resctrl-fix-miscount-of-bandwidth-event-when-reactivating-previously-unavailable-rmid.patch create mode 100644 queue-6.12/x86-resctrl-refactor-resctrl_arch_rmid_read.patch create mode 100644 queue-6.12/xfs-fix-log-crc-mismatches-between-i386-and-other-architectures.patch create mode 100644 queue-6.12/xfs-rename-the-old_crc-variable-in-xlog_recover_process.patch create mode 100644 queue-6.12/xfs-use-deferred-intent-items-for-reaping-crosslinked-blocks.patch diff --git a/queue-6.12/d_alloc_parallel-set-dcache_par_lookup-earlier.patch b/queue-6.12/d_alloc_parallel-set-dcache_par_lookup-earlier.patch new file mode 100644 index 0000000000..5b7cc71f9d --- /dev/null +++ b/queue-6.12/d_alloc_parallel-set-dcache_par_lookup-earlier.patch @@ -0,0 +1,70 @@ +From stable+bounces-188211-greg=kroah.com@vger.kernel.org Mon Oct 20 19:29:08 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 13:28:58 -0400 +Subject: d_alloc_parallel(): set DCACHE_PAR_LOOKUP earlier +To: stable@vger.kernel.org +Cc: Al Viro , Christian Brauner , Sasha Levin +Message-ID: <20251020172900.1851256-1-sashal@kernel.org> + +From: Al Viro + +[ Upstream commit e95db51c81f54dd12ea465b5127e4786f62a1095 ] + +Do that before new dentry is visible anywhere. It does create +a new possible state for dentries present in ->d_children/->d_sib - +DCACHE_PAR_LOOKUP present, negative, unhashed, not in in-lookup +hash chains, refcount positive. Those are going to be skipped +by all tree-walkers (both d_walk() callbacks in fs/dcache.c and +explicit loops over children/sibling lists elsewhere) and +dput() is fine with those. + +NOTE: dropping the final reference to a "normal" in-lookup dentry +(in in-lookup hash) is a bug - somebody must've forgotten to +call d_lookup_done() on it and bad things will happen. With those +it's OK; if/when we get around to making __dentry_kill() complain +about such breakage, remember that predicate to check should +*not* be just d_in_lookup(victim) but rather a combination of that +with !hlist_bl_unhashed(&victim->d_u.d_in_lookup_hash). Might +be worth considering later... + +Reviewed-by: Christian Brauner +Signed-off-by: Al Viro +Stable-dep-of: 56094ad3eaa2 ("vfs: Don't leak disconnected dentries on umount") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/dcache.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +--- a/fs/dcache.c ++++ b/fs/dcache.c +@@ -2475,13 +2475,19 @@ struct dentry *d_alloc_parallel(struct d + unsigned int hash = name->hash; + struct hlist_bl_head *b = in_lookup_hash(parent, hash); + struct hlist_bl_node *node; +- struct dentry *new = d_alloc(parent, name); ++ struct dentry *new = __d_alloc(parent->d_sb, name); + struct dentry *dentry; + unsigned seq, r_seq, d_seq; + + if (unlikely(!new)) + return ERR_PTR(-ENOMEM); + ++ new->d_flags |= DCACHE_PAR_LOOKUP; ++ spin_lock(&parent->d_lock); ++ new->d_parent = dget_dlock(parent); ++ hlist_add_head(&new->d_sib, &parent->d_children); ++ spin_unlock(&parent->d_lock); ++ + retry: + rcu_read_lock(); + seq = smp_load_acquire(&parent->d_inode->i_dir_seq); +@@ -2565,8 +2571,6 @@ retry: + return dentry; + } + rcu_read_unlock(); +- /* we can't take ->d_lock here; it's OK, though. */ +- new->d_flags |= DCACHE_PAR_LOOKUP; + new->d_wait = wq; + hlist_bl_add_head(&new->d_u.d_in_lookup_hash, b); + hlist_bl_unlock(b); diff --git a/queue-6.12/hfsplus-fix-slab-out-of-bounds-read-in-hfsplus_strcasecmp.patch b/queue-6.12/hfsplus-fix-slab-out-of-bounds-read-in-hfsplus_strcasecmp.patch new file mode 100644 index 0000000000..0660fa2a1e --- /dev/null +++ b/queue-6.12/hfsplus-fix-slab-out-of-bounds-read-in-hfsplus_strcasecmp.patch @@ -0,0 +1,223 @@ +From 42520df65bf67189541a425f7d36b0b3e7bd7844 Mon Sep 17 00:00:00 2001 +From: Viacheslav Dubeyko +Date: Fri, 19 Sep 2025 12:12:44 -0700 +Subject: hfsplus: fix slab-out-of-bounds read in hfsplus_strcasecmp() + +From: Viacheslav Dubeyko + +commit 42520df65bf67189541a425f7d36b0b3e7bd7844 upstream. + +The hfsplus_strcasecmp() logic can trigger the issue: + +[ 117.317703][ T9855] ================================================================== +[ 117.318353][ T9855] BUG: KASAN: slab-out-of-bounds in hfsplus_strcasecmp+0x1bc/0x490 +[ 117.318991][ T9855] Read of size 2 at addr ffff88802160f40c by task repro/9855 +[ 117.319577][ T9855] +[ 117.319773][ T9855] CPU: 0 UID: 0 PID: 9855 Comm: repro Not tainted 6.17.0-rc6 #33 PREEMPT(full) +[ 117.319780][ T9855] Hardware name: QEMU Ubuntu 24.04 PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 +[ 117.319783][ T9855] Call Trace: +[ 117.319785][ T9855] +[ 117.319788][ T9855] dump_stack_lvl+0x1c1/0x2a0 +[ 117.319795][ T9855] ? __virt_addr_valid+0x1c8/0x5c0 +[ 117.319803][ T9855] ? __pfx_dump_stack_lvl+0x10/0x10 +[ 117.319808][ T9855] ? rcu_is_watching+0x15/0xb0 +[ 117.319816][ T9855] ? lock_release+0x4b/0x3e0 +[ 117.319821][ T9855] ? __kasan_check_byte+0x12/0x40 +[ 117.319828][ T9855] ? __virt_addr_valid+0x1c8/0x5c0 +[ 117.319835][ T9855] ? __virt_addr_valid+0x4a5/0x5c0 +[ 117.319842][ T9855] print_report+0x17e/0x7e0 +[ 117.319848][ T9855] ? __virt_addr_valid+0x1c8/0x5c0 +[ 117.319855][ T9855] ? __virt_addr_valid+0x4a5/0x5c0 +[ 117.319862][ T9855] ? __phys_addr+0xd3/0x180 +[ 117.319869][ T9855] ? hfsplus_strcasecmp+0x1bc/0x490 +[ 117.319876][ T9855] kasan_report+0x147/0x180 +[ 117.319882][ T9855] ? hfsplus_strcasecmp+0x1bc/0x490 +[ 117.319891][ T9855] hfsplus_strcasecmp+0x1bc/0x490 +[ 117.319900][ T9855] ? __pfx_hfsplus_cat_case_cmp_key+0x10/0x10 +[ 117.319906][ T9855] hfs_find_rec_by_key+0xa9/0x1e0 +[ 117.319913][ T9855] __hfsplus_brec_find+0x18e/0x470 +[ 117.319920][ T9855] ? __pfx_hfsplus_bnode_find+0x10/0x10 +[ 117.319926][ T9855] ? __pfx_hfs_find_rec_by_key+0x10/0x10 +[ 117.319933][ T9855] ? __pfx___hfsplus_brec_find+0x10/0x10 +[ 117.319942][ T9855] hfsplus_brec_find+0x28f/0x510 +[ 117.319949][ T9855] ? __pfx_hfs_find_rec_by_key+0x10/0x10 +[ 117.319956][ T9855] ? __pfx_hfsplus_brec_find+0x10/0x10 +[ 117.319963][ T9855] ? __kmalloc_noprof+0x2a9/0x510 +[ 117.319969][ T9855] ? hfsplus_find_init+0x8c/0x1d0 +[ 117.319976][ T9855] hfsplus_brec_read+0x2b/0x120 +[ 117.319983][ T9855] hfsplus_lookup+0x2aa/0x890 +[ 117.319990][ T9855] ? __pfx_hfsplus_lookup+0x10/0x10 +[ 117.320003][ T9855] ? d_alloc_parallel+0x2f0/0x15e0 +[ 117.320008][ T9855] ? __lock_acquire+0xaec/0xd80 +[ 117.320013][ T9855] ? __pfx_d_alloc_parallel+0x10/0x10 +[ 117.320019][ T9855] ? __raw_spin_lock_init+0x45/0x100 +[ 117.320026][ T9855] ? __init_waitqueue_head+0xa9/0x150 +[ 117.320034][ T9855] __lookup_slow+0x297/0x3d0 +[ 117.320039][ T9855] ? __pfx___lookup_slow+0x10/0x10 +[ 117.320045][ T9855] ? down_read+0x1ad/0x2e0 +[ 117.320055][ T9855] lookup_slow+0x53/0x70 +[ 117.320065][ T9855] walk_component+0x2f0/0x430 +[ 117.320073][ T9855] path_lookupat+0x169/0x440 +[ 117.320081][ T9855] filename_lookup+0x212/0x590 +[ 117.320089][ T9855] ? __pfx_filename_lookup+0x10/0x10 +[ 117.320098][ T9855] ? strncpy_from_user+0x150/0x290 +[ 117.320105][ T9855] ? getname_flags+0x1e5/0x540 +[ 117.320112][ T9855] user_path_at+0x3a/0x60 +[ 117.320117][ T9855] __x64_sys_umount+0xee/0x160 +[ 117.320123][ T9855] ? __pfx___x64_sys_umount+0x10/0x10 +[ 117.320129][ T9855] ? do_syscall_64+0xb7/0x3a0 +[ 117.320135][ T9855] ? entry_SYSCALL_64_after_hwframe+0x77/0x7f +[ 117.320141][ T9855] ? entry_SYSCALL_64_after_hwframe+0x77/0x7f +[ 117.320145][ T9855] do_syscall_64+0xf3/0x3a0 +[ 117.320150][ T9855] ? exc_page_fault+0x9f/0xf0 +[ 117.320154][ T9855] entry_SYSCALL_64_after_hwframe+0x77/0x7f +[ 117.320158][ T9855] RIP: 0033:0x7f7dd7908b07 +[ 117.320163][ T9855] Code: 23 0d 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 31 f6 e9 09 00 00 00 66 0f 1f 84 00 00 08 +[ 117.320167][ T9855] RSP: 002b:00007ffd5ebd9698 EFLAGS: 00000202 ORIG_RAX: 00000000000000a6 +[ 117.320172][ T9855] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f7dd7908b07 +[ 117.320176][ T9855] RDX: 0000000000000009 RSI: 0000000000000009 RDI: 00007ffd5ebd9740 +[ 117.320179][ T9855] RBP: 00007ffd5ebda780 R08: 0000000000000005 R09: 00007ffd5ebd9530 +[ 117.320181][ T9855] R10: 00007f7dd799bfc0 R11: 0000000000000202 R12: 000055e2008b32d0 +[ 117.320184][ T9855] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 +[ 117.320189][ T9855] +[ 117.320190][ T9855] +[ 117.351311][ T9855] Allocated by task 9855: +[ 117.351683][ T9855] kasan_save_track+0x3e/0x80 +[ 117.352093][ T9855] __kasan_kmalloc+0x8d/0xa0 +[ 117.352490][ T9855] __kmalloc_noprof+0x288/0x510 +[ 117.352914][ T9855] hfsplus_find_init+0x8c/0x1d0 +[ 117.353342][ T9855] hfsplus_lookup+0x19c/0x890 +[ 117.353747][ T9855] __lookup_slow+0x297/0x3d0 +[ 117.354148][ T9855] lookup_slow+0x53/0x70 +[ 117.354514][ T9855] walk_component+0x2f0/0x430 +[ 117.354921][ T9855] path_lookupat+0x169/0x440 +[ 117.355325][ T9855] filename_lookup+0x212/0x590 +[ 117.355740][ T9855] user_path_at+0x3a/0x60 +[ 117.356115][ T9855] __x64_sys_umount+0xee/0x160 +[ 117.356529][ T9855] do_syscall_64+0xf3/0x3a0 +[ 117.356920][ T9855] entry_SYSCALL_64_after_hwframe+0x77/0x7f +[ 117.357429][ T9855] +[ 117.357636][ T9855] The buggy address belongs to the object at ffff88802160f000 +[ 117.357636][ T9855] which belongs to the cache kmalloc-2k of size 2048 +[ 117.358827][ T9855] The buggy address is located 0 bytes to the right of +[ 117.358827][ T9855] allocated 1036-byte region [ffff88802160f000, ffff88802160f40c) +[ 117.360061][ T9855] +[ 117.360266][ T9855] The buggy address belongs to the physical page: +[ 117.360813][ T9855] page: refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x21608 +[ 117.361562][ T9855] head: order:3 mapcount:0 entire_mapcount:0 nr_pages_mapped:0 pincount:0 +[ 117.362285][ T9855] flags: 0xfff00000000040(head|node=0|zone=1|lastcpupid=0x7ff) +[ 117.362929][ T9855] page_type: f5(slab) +[ 117.363282][ T9855] raw: 00fff00000000040 ffff88801a842f00 ffffea0000932000 dead000000000002 +[ 117.364015][ T9855] raw: 0000000000000000 0000000080080008 00000000f5000000 0000000000000000 +[ 117.364750][ T9855] head: 00fff00000000040 ffff88801a842f00 ffffea0000932000 dead000000000002 +[ 117.365491][ T9855] head: 0000000000000000 0000000080080008 00000000f5000000 0000000000000000 +[ 117.366232][ T9855] head: 00fff00000000003 ffffea0000858201 00000000ffffffff 00000000ffffffff +[ 117.366968][ T9855] head: ffffffffffffffff 0000000000000000 00000000ffffffff 0000000000000008 +[ 117.367711][ T9855] page dumped because: kasan: bad access detected +[ 117.368259][ T9855] page_owner tracks the page as allocated +[ 117.368745][ T9855] page last allocated via order 3, migratetype Unmovable, gfp_mask 0xd20c0(__GFP_IO|__GFP_FS|__GFP_NOWARN1 +[ 117.370541][ T9855] post_alloc_hook+0x240/0x2a0 +[ 117.370954][ T9855] get_page_from_freelist+0x2101/0x21e0 +[ 117.371435][ T9855] __alloc_frozen_pages_noprof+0x274/0x380 +[ 117.371935][ T9855] alloc_pages_mpol+0x241/0x4b0 +[ 117.372360][ T9855] allocate_slab+0x8d/0x380 +[ 117.372752][ T9855] ___slab_alloc+0xbe3/0x1400 +[ 117.373159][ T9855] __kmalloc_cache_noprof+0x296/0x3d0 +[ 117.373621][ T9855] nexthop_net_init+0x75/0x100 +[ 117.374038][ T9855] ops_init+0x35c/0x5c0 +[ 117.374400][ T9855] setup_net+0x10c/0x320 +[ 117.374768][ T9855] copy_net_ns+0x31b/0x4d0 +[ 117.375156][ T9855] create_new_namespaces+0x3f3/0x720 +[ 117.375613][ T9855] unshare_nsproxy_namespaces+0x11c/0x170 +[ 117.376094][ T9855] ksys_unshare+0x4ca/0x8d0 +[ 117.376477][ T9855] __x64_sys_unshare+0x38/0x50 +[ 117.376879][ T9855] do_syscall_64+0xf3/0x3a0 +[ 117.377265][ T9855] page last free pid 9110 tgid 9110 stack trace: +[ 117.377795][ T9855] __free_frozen_pages+0xbeb/0xd50 +[ 117.378229][ T9855] __put_partials+0x152/0x1a0 +[ 117.378625][ T9855] put_cpu_partial+0x17c/0x250 +[ 117.379026][ T9855] __slab_free+0x2d4/0x3c0 +[ 117.379404][ T9855] qlist_free_all+0x97/0x140 +[ 117.379790][ T9855] kasan_quarantine_reduce+0x148/0x160 +[ 117.380250][ T9855] __kasan_slab_alloc+0x22/0x80 +[ 117.380662][ T9855] __kmalloc_noprof+0x232/0x510 +[ 117.381074][ T9855] tomoyo_supervisor+0xc0a/0x1360 +[ 117.381498][ T9855] tomoyo_env_perm+0x149/0x1e0 +[ 117.381903][ T9855] tomoyo_find_next_domain+0x15ad/0x1b90 +[ 117.382378][ T9855] tomoyo_bprm_check_security+0x11c/0x180 +[ 117.382859][ T9855] security_bprm_check+0x89/0x280 +[ 117.383289][ T9855] bprm_execve+0x8f1/0x14a0 +[ 117.383673][ T9855] do_execveat_common+0x528/0x6b0 +[ 117.384103][ T9855] __x64_sys_execve+0x94/0xb0 +[ 117.384500][ T9855] +[ 117.384706][ T9855] Memory state around the buggy address: +[ 117.385179][ T9855] ffff88802160f300: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +[ 117.385854][ T9855] ffff88802160f380: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +[ 117.386534][ T9855] >ffff88802160f400: 00 04 fc fc fc fc fc fc fc fc fc fc fc fc fc fc +[ 117.387204][ T9855] ^ +[ 117.387566][ T9855] ffff88802160f480: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc +[ 117.388243][ T9855] ffff88802160f500: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc +[ 117.388918][ T9855] ================================================================== + +The issue takes place if the length field of struct hfsplus_unistr +is bigger than HFSPLUS_MAX_STRLEN. The patch simply checks +the length of comparing strings. And if the strings' length +is bigger than HFSPLUS_MAX_STRLEN, then it is corrected +to this value. + +v2 +The string length correction has been added for hfsplus_strcmp(). + +Reported-by: Jiaming Zhang +Signed-off-by: Viacheslav Dubeyko +cc: John Paul Adrian Glaubitz +cc: Yangtao Li +cc: linux-fsdevel@vger.kernel.org +cc: syzkaller@googlegroups.com +Link: https://lore.kernel.org/r/20250919191243.1370388-1-slava@dubeyko.com +Signed-off-by: Viacheslav Dubeyko +Signed-off-by: Greg Kroah-Hartman +--- + fs/hfsplus/unicode.c | 24 ++++++++++++++++++++++++ + 1 file changed, 24 insertions(+) + +--- a/fs/hfsplus/unicode.c ++++ b/fs/hfsplus/unicode.c +@@ -40,6 +40,18 @@ int hfsplus_strcasecmp(const struct hfsp + p1 = s1->unicode; + p2 = s2->unicode; + ++ if (len1 > HFSPLUS_MAX_STRLEN) { ++ len1 = HFSPLUS_MAX_STRLEN; ++ pr_err("invalid length %u has been corrected to %d\n", ++ be16_to_cpu(s1->length), len1); ++ } ++ ++ if (len2 > HFSPLUS_MAX_STRLEN) { ++ len2 = HFSPLUS_MAX_STRLEN; ++ pr_err("invalid length %u has been corrected to %d\n", ++ be16_to_cpu(s2->length), len2); ++ } ++ + while (1) { + c1 = c2 = 0; + +@@ -74,6 +86,18 @@ int hfsplus_strcmp(const struct hfsplus_ + p1 = s1->unicode; + p2 = s2->unicode; + ++ if (len1 > HFSPLUS_MAX_STRLEN) { ++ len1 = HFSPLUS_MAX_STRLEN; ++ pr_err("invalid length %u has been corrected to %d\n", ++ be16_to_cpu(s1->length), len1); ++ } ++ ++ if (len2 > HFSPLUS_MAX_STRLEN) { ++ len2 = HFSPLUS_MAX_STRLEN; ++ pr_err("invalid length %u has been corrected to %d\n", ++ be16_to_cpu(s2->length), len2); ++ } ++ + for (len = min(len1, len2); len > 0; len--) { + c1 = be16_to_cpu(*p1); + c2 = be16_to_cpu(*p2); diff --git a/queue-6.12/iio-imu-inv_icm42600-avoid-configuring-if-already-pm_runtime-suspended.patch b/queue-6.12/iio-imu-inv_icm42600-avoid-configuring-if-already-pm_runtime-suspended.patch new file mode 100644 index 0000000000..b401a5da12 --- /dev/null +++ b/queue-6.12/iio-imu-inv_icm42600-avoid-configuring-if-already-pm_runtime-suspended.patch @@ -0,0 +1,66 @@ +From stable+bounces-188108-greg=kroah.com@vger.kernel.org Mon Oct 20 15:08:22 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 09:08:10 -0400 +Subject: iio: imu: inv_icm42600: Avoid configuring if already pm_runtime suspended +To: stable@vger.kernel.org +Cc: Sean Nyekjaer , Stable@vger.kernel.org, Jonathan Cameron , Sasha Levin +Message-ID: <20251020130810.1766634-1-sashal@kernel.org> + +From: Sean Nyekjaer + +[ Upstream commit 466f7a2fef2a4e426f809f79845a1ec1aeb558f4 ] + +Do as in suspend, skip resume configuration steps if the device is already +pm_runtime suspended. This avoids reconfiguring a device that is already +in the correct low-power state and ensures that pm_runtime handles the +power state transitions properly. + +Fixes: 31c24c1e93c3 ("iio: imu: inv_icm42600: add core of new inv_icm42600 driver") +Signed-off-by: Sean Nyekjaer +Link: https://patch.msgid.link/20250901-icm42pmreg-v3-3-ef1336246960@geanix.com +Cc: +Signed-off-by: Jonathan Cameron +[ removed apex/wakeup variable declarations ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/iio/imu/inv_icm42600/inv_icm42600_core.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +--- a/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c ++++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c +@@ -787,17 +787,15 @@ EXPORT_SYMBOL_NS_GPL(inv_icm42600_core_p + static int inv_icm42600_suspend(struct device *dev) + { + struct inv_icm42600_state *st = dev_get_drvdata(dev); +- int ret; ++ int ret = 0; + + mutex_lock(&st->lock); + + st->suspended.gyro = st->conf.gyro.mode; + st->suspended.accel = st->conf.accel.mode; + st->suspended.temp = st->conf.temp_en; +- if (pm_runtime_suspended(dev)) { +- ret = 0; ++ if (pm_runtime_suspended(dev)) + goto out_unlock; +- } + + /* disable FIFO data streaming */ + if (st->fifo.on) { +@@ -829,10 +827,13 @@ static int inv_icm42600_resume(struct de + struct inv_icm42600_state *st = dev_get_drvdata(dev); + struct inv_icm42600_sensor_state *gyro_st = iio_priv(st->indio_gyro); + struct inv_icm42600_sensor_state *accel_st = iio_priv(st->indio_accel); +- int ret; ++ int ret = 0; + + mutex_lock(&st->lock); + ++ if (pm_runtime_suspended(dev)) ++ goto out_unlock; ++ + ret = inv_icm42600_enable_regulator_vddio(st); + if (ret) + goto out_unlock; diff --git a/queue-6.12/iio-imu-inv_icm42600-simplify-pm_runtime-setup.patch b/queue-6.12/iio-imu-inv_icm42600-simplify-pm_runtime-setup.patch new file mode 100644 index 0000000000..b3e27b6cf9 --- /dev/null +++ b/queue-6.12/iio-imu-inv_icm42600-simplify-pm_runtime-setup.patch @@ -0,0 +1,86 @@ +From stable+bounces-188095-greg=kroah.com@vger.kernel.org Mon Oct 20 15:06:21 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 09:02:39 -0400 +Subject: iio: imu: inv_icm42600: Simplify pm_runtime setup +To: stable@vger.kernel.org +Cc: Sean Nyekjaer , Stable@vger.kernel.org, Jonathan Cameron , Sasha Levin +Message-ID: <20251020130239.1763909-2-sashal@kernel.org> + +From: Sean Nyekjaer + +[ Upstream commit 0792c1984a45ccd7a296d6b8cb78088bc99a212e ] + +Rework the power management in inv_icm42600_core_probe() to use +devm_pm_runtime_set_active_enabled(), which simplifies the runtime PM +setup by handling activation and enabling in one step. +Remove the separate inv_icm42600_disable_pm callback, as it's no longer +needed with the devm-managed approach. +Using devm_pm_runtime_enable() also fixes the missing disable of +autosuspend. +Update inv_icm42600_disable_vddio_reg() to only disable the regulator if +the device is not suspended i.e. powered-down, preventing unbalanced +disables. +Also remove redundant error msg on regulator_disable(), the regulator +framework already emits an error message when regulator_disable() fails. + +This simplifies the PM setup and avoids manipulating the usage counter +unnecessarily. + +Fixes: 31c24c1e93c3 ("iio: imu: inv_icm42600: add core of new inv_icm42600 driver") +Signed-off-by: Sean Nyekjaer +Link: https://patch.msgid.link/20250901-icm42pmreg-v3-1-ef1336246960@geanix.com +Cc: +Signed-off-by: Jonathan Cameron +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/iio/imu/inv_icm42600/inv_icm42600_core.c | 24 ++++++----------------- + 1 file changed, 7 insertions(+), 17 deletions(-) + +--- a/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c ++++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c +@@ -667,20 +667,12 @@ static void inv_icm42600_disable_vdd_reg + static void inv_icm42600_disable_vddio_reg(void *_data) + { + struct inv_icm42600_state *st = _data; +- const struct device *dev = regmap_get_device(st->map); +- int ret; +- +- ret = regulator_disable(st->vddio_supply); +- if (ret) +- dev_err(dev, "failed to disable vddio error %d\n", ret); +-} ++ struct device *dev = regmap_get_device(st->map); + +-static void inv_icm42600_disable_pm(void *_data) +-{ +- struct device *dev = _data; ++ if (pm_runtime_status_suspended(dev)) ++ return; + +- pm_runtime_put_sync(dev); +- pm_runtime_disable(dev); ++ regulator_disable(st->vddio_supply); + } + + int inv_icm42600_core_probe(struct regmap *regmap, int chip, int irq, +@@ -777,16 +769,14 @@ int inv_icm42600_core_probe(struct regma + return ret; + + /* setup runtime power management */ +- ret = pm_runtime_set_active(dev); ++ ret = devm_pm_runtime_set_active_enabled(dev); + if (ret) + return ret; +- pm_runtime_get_noresume(dev); +- pm_runtime_enable(dev); ++ + pm_runtime_set_autosuspend_delay(dev, INV_ICM42600_SUSPEND_DELAY_MS); + pm_runtime_use_autosuspend(dev); +- pm_runtime_put(dev); + +- return devm_add_action_or_reset(dev, inv_icm42600_disable_pm, dev); ++ return ret; + } + EXPORT_SYMBOL_NS_GPL(inv_icm42600_core_probe, IIO_ICM42600); + diff --git a/queue-6.12/ipv4-adopt-dst_dev-skb_dst_dev-and-skb_dst_dev_net.patch b/queue-6.12/ipv4-adopt-dst_dev-skb_dst_dev-and-skb_dst_dev_net.patch new file mode 100644 index 0000000000..a3c66fd47b --- /dev/null +++ b/queue-6.12/ipv4-adopt-dst_dev-skb_dst_dev-and-skb_dst_dev_net.patch @@ -0,0 +1,384 @@ +From stable+bounces-188150-greg=kroah.com@vger.kernel.org Mon Oct 20 17:47:07 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 11:44:05 -0400 +Subject: ipv4: adopt dst_dev, skb_dst_dev and skb_dst_dev_net[_rcu] +To: stable@vger.kernel.org +Cc: Eric Dumazet , Kuniyuki Iwashima , Jakub Kicinski , Sasha Levin +Message-ID: <20251020154409.1823664-4-sashal@kernel.org> + +From: Eric Dumazet + +[ Upstream commit a74fc62eec155ca5a6da8ff3856f3dc87fe24558 ] + +Use the new helpers as a first step to deal with +potential dst->dev races. + +Signed-off-by: Eric Dumazet +Reviewed-by: Kuniyuki Iwashima +Link: https://patch.msgid.link/20250630121934.3399505-8-edumazet@google.com +Signed-off-by: Jakub Kicinski +Stable-dep-of: 833d4313bc1e ("mptcp: reset blackhole on success with non-loopback ifaces") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + include/net/inet_hashtables.h | 2 +- + include/net/ip.h | 11 ++++++----- + include/net/route.h | 2 +- + net/ipv4/icmp.c | 24 +++++++++++++----------- + net/ipv4/igmp.c | 2 +- + net/ipv4/ip_fragment.c | 2 +- + net/ipv4/ip_output.c | 6 +++--- + net/ipv4/ip_vti.c | 4 ++-- + net/ipv4/netfilter.c | 4 ++-- + net/ipv4/route.c | 8 ++++---- + net/ipv4/tcp_fastopen.c | 4 +++- + net/ipv4/tcp_ipv4.c | 2 +- + net/ipv4/tcp_metrics.c | 8 ++++---- + net/ipv4/xfrm4_output.c | 2 +- + 14 files changed, 43 insertions(+), 38 deletions(-) + +--- a/include/net/inet_hashtables.h ++++ b/include/net/inet_hashtables.h +@@ -492,7 +492,7 @@ static inline struct sock *__inet_lookup + const int sdif, + bool *refcounted) + { +- struct net *net = dev_net_rcu(skb_dst(skb)->dev); ++ struct net *net = skb_dst_dev_net_rcu(skb); + const struct iphdr *iph = ip_hdr(skb); + struct sock *sk; + +--- a/include/net/ip.h ++++ b/include/net/ip.h +@@ -475,7 +475,7 @@ static inline unsigned int ip_dst_mtu_ma + + rcu_read_lock(); + +- net = dev_net_rcu(dst->dev); ++ net = dev_net_rcu(dst_dev(dst)); + if (READ_ONCE(net->ipv4.sysctl_ip_fwd_use_pmtu) || + ip_mtu_locked(dst) || + !forwarding) { +@@ -489,7 +489,7 @@ static inline unsigned int ip_dst_mtu_ma + if (mtu) + goto out; + +- mtu = READ_ONCE(dst->dev->mtu); ++ mtu = READ_ONCE(dst_dev(dst)->mtu); + + if (unlikely(ip_mtu_locked(dst))) { + if (rt->rt_uses_gateway && mtu > 576) +@@ -509,16 +509,17 @@ out: + static inline unsigned int ip_skb_dst_mtu(struct sock *sk, + const struct sk_buff *skb) + { ++ const struct dst_entry *dst = skb_dst(skb); + unsigned int mtu; + + if (!sk || !sk_fullsock(sk) || ip_sk_use_pmtu(sk)) { + bool forwarding = IPCB(skb)->flags & IPSKB_FORWARDED; + +- return ip_dst_mtu_maybe_forward(skb_dst(skb), forwarding); ++ return ip_dst_mtu_maybe_forward(dst, forwarding); + } + +- mtu = min(READ_ONCE(skb_dst(skb)->dev->mtu), IP_MAX_MTU); +- return mtu - lwtunnel_headroom(skb_dst(skb)->lwtstate, mtu); ++ mtu = min(READ_ONCE(dst_dev(dst)->mtu), IP_MAX_MTU); ++ return mtu - lwtunnel_headroom(dst->lwtstate, mtu); + } + + struct dst_metrics *ip_fib_metrics_init(struct nlattr *fc_mx, int fc_mx_len, +--- a/include/net/route.h ++++ b/include/net/route.h +@@ -369,7 +369,7 @@ static inline int ip4_dst_hoplimit(const + const struct net *net; + + rcu_read_lock(); +- net = dev_net_rcu(dst->dev); ++ net = dev_net_rcu(dst_dev(dst)); + hoplimit = READ_ONCE(net->ipv4.sysctl_ip_default_ttl); + rcu_read_unlock(); + } +--- a/net/ipv4/icmp.c ++++ b/net/ipv4/icmp.c +@@ -311,18 +311,20 @@ static bool icmpv4_xrlim_allow(struct ne + { + struct dst_entry *dst = &rt->dst; + struct inet_peer *peer; ++ struct net_device *dev; + bool rc = true; + + if (!apply_ratelimit) + return true; + + /* No rate limit on loopback */ +- if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) ++ dev = dst_dev(dst); ++ if (dev && (dev->flags & IFF_LOOPBACK)) + goto out; + + rcu_read_lock(); + peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, +- l3mdev_master_ifindex_rcu(dst->dev)); ++ l3mdev_master_ifindex_rcu(dev)); + rc = inet_peer_xrlim_allow(peer, + READ_ONCE(net->ipv4.sysctl_icmp_ratelimit)); + rcu_read_unlock(); +@@ -468,13 +470,13 @@ out_bh_enable: + */ + static struct net_device *icmp_get_route_lookup_dev(struct sk_buff *skb) + { +- struct net_device *route_lookup_dev = NULL; ++ struct net_device *dev = skb->dev; ++ const struct dst_entry *dst; + +- if (skb->dev) +- route_lookup_dev = skb->dev; +- else if (skb_dst(skb)) +- route_lookup_dev = skb_dst(skb)->dev; +- return route_lookup_dev; ++ if (dev) ++ return dev; ++ dst = skb_dst(skb); ++ return dst ? dst_dev(dst) : NULL; + } + + static struct rtable *icmp_route_lookup(struct net *net, struct flowi4 *fl4, +@@ -873,7 +875,7 @@ static enum skb_drop_reason icmp_unreach + struct net *net; + u32 info = 0; + +- net = dev_net_rcu(skb_dst(skb)->dev); ++ net = skb_dst_dev_net_rcu(skb); + + /* + * Incomplete header ? +@@ -1016,7 +1018,7 @@ static enum skb_drop_reason icmp_echo(st + struct icmp_bxm icmp_param; + struct net *net; + +- net = dev_net_rcu(skb_dst(skb)->dev); ++ net = skb_dst_dev_net_rcu(skb); + /* should there be an ICMP stat for ignored echos? */ + if (READ_ONCE(net->ipv4.sysctl_icmp_echo_ignore_all)) + return SKB_NOT_DROPPED_YET; +@@ -1186,7 +1188,7 @@ static enum skb_drop_reason icmp_timesta + return SKB_NOT_DROPPED_YET; + + out_err: +- __ICMP_INC_STATS(dev_net_rcu(skb_dst(skb)->dev), ICMP_MIB_INERRORS); ++ __ICMP_INC_STATS(skb_dst_dev_net_rcu(skb), ICMP_MIB_INERRORS); + return SKB_DROP_REASON_PKT_TOO_SMALL; + } + +--- a/net/ipv4/igmp.c ++++ b/net/ipv4/igmp.c +@@ -424,7 +424,7 @@ static int igmpv3_sendpack(struct sk_buf + + pig->csum = ip_compute_csum(igmp_hdr(skb), igmplen); + +- return ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb); ++ return ip_local_out(skb_dst_dev_net(skb), skb->sk, skb); + } + + static int grec_size(struct ip_mc_list *pmc, int type, int gdel, int sdel) +--- a/net/ipv4/ip_fragment.c ++++ b/net/ipv4/ip_fragment.c +@@ -488,7 +488,7 @@ out_fail: + /* Process an incoming IP datagram fragment. */ + int ip_defrag(struct net *net, struct sk_buff *skb, u32 user) + { +- struct net_device *dev = skb->dev ? : skb_dst(skb)->dev; ++ struct net_device *dev = skb->dev ? : skb_dst_dev(skb); + int vif = l3mdev_master_ifindex_rcu(dev); + struct ipq *qp; + +--- a/net/ipv4/ip_output.c ++++ b/net/ipv4/ip_output.c +@@ -117,7 +117,7 @@ int __ip_local_out(struct net *net, stru + skb->protocol = htons(ETH_P_IP); + + return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, +- net, sk, skb, NULL, skb_dst(skb)->dev, ++ net, sk, skb, NULL, skb_dst_dev(skb), + dst_output); + } + +@@ -200,7 +200,7 @@ static int ip_finish_output2(struct net + { + struct dst_entry *dst = skb_dst(skb); + struct rtable *rt = dst_rtable(dst); +- struct net_device *dev = dst->dev; ++ struct net_device *dev = dst_dev(dst); + unsigned int hh_len = LL_RESERVED_SPACE(dev); + struct neighbour *neigh; + bool is_v6gw = false; +@@ -426,7 +426,7 @@ int ip_mc_output(struct net *net, struct + + int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb) + { +- struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev; ++ struct net_device *dev = skb_dst_dev(skb), *indev = skb->dev; + + skb->dev = dev; + skb->protocol = htons(ETH_P_IP); +--- a/net/ipv4/ip_vti.c ++++ b/net/ipv4/ip_vti.c +@@ -229,7 +229,7 @@ static netdev_tx_t vti_xmit(struct sk_bu + goto tx_error_icmp; + } + +- tdev = dst->dev; ++ tdev = dst_dev(dst); + + if (tdev == dev) { + dst_release(dst); +@@ -259,7 +259,7 @@ static netdev_tx_t vti_xmit(struct sk_bu + xmit: + skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev))); + skb_dst_set(skb, dst); +- skb->dev = skb_dst(skb)->dev; ++ skb->dev = skb_dst_dev(skb); + + err = dst_output(tunnel->net, skb->sk, skb); + if (net_xmit_eval(err) == 0) +--- a/net/ipv4/netfilter.c ++++ b/net/ipv4/netfilter.c +@@ -20,12 +20,12 @@ + /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ + int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, unsigned int addr_type) + { ++ struct net_device *dev = skb_dst_dev(skb); + const struct iphdr *iph = ip_hdr(skb); + struct rtable *rt; + struct flowi4 fl4 = {}; + __be32 saddr = iph->saddr; + __u8 flags; +- struct net_device *dev = skb_dst(skb)->dev; + struct flow_keys flkeys; + unsigned int hh_len; + +@@ -74,7 +74,7 @@ int ip_route_me_harder(struct net *net, + #endif + + /* Change in oif may mean change in hh_len. */ +- hh_len = skb_dst(skb)->dev->hard_header_len; ++ hh_len = skb_dst_dev(skb)->hard_header_len; + if (skb_headroom(skb) < hh_len && + pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)), + 0, GFP_ATOMIC)) +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -413,7 +413,7 @@ static struct neighbour *ipv4_neigh_look + const void *daddr) + { + const struct rtable *rt = container_of(dst, struct rtable, dst); +- struct net_device *dev = dst->dev; ++ struct net_device *dev = dst_dev(dst); + struct neighbour *n; + + rcu_read_lock(); +@@ -440,7 +440,7 @@ static struct neighbour *ipv4_neigh_look + static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr) + { + const struct rtable *rt = container_of(dst, struct rtable, dst); +- struct net_device *dev = dst->dev; ++ struct net_device *dev = dst_dev(dst); + const __be32 *pkey = daddr; + + if (rt->rt_gw_family == AF_INET) { +@@ -1025,7 +1025,7 @@ static void __ip_rt_update_pmtu(struct r + return; + + rcu_read_lock(); +- net = dev_net_rcu(dst->dev); ++ net = dev_net_rcu(dst_dev(dst)); + if (mtu < net->ipv4.ip_rt_min_pmtu) { + lock = true; + mtu = min(old_mtu, net->ipv4.ip_rt_min_pmtu); +@@ -1323,7 +1323,7 @@ static unsigned int ipv4_default_advmss( + struct net *net; + + rcu_read_lock(); +- net = dev_net_rcu(dst->dev); ++ net = dev_net_rcu(dst_dev(dst)); + advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size, + net->ipv4.ip_rt_min_advmss); + rcu_read_unlock(); +--- a/net/ipv4/tcp_fastopen.c ++++ b/net/ipv4/tcp_fastopen.c +@@ -558,6 +558,7 @@ bool tcp_fastopen_active_should_disable( + void tcp_fastopen_active_disable_ofo_check(struct sock *sk) + { + struct tcp_sock *tp = tcp_sk(sk); ++ struct net_device *dev; + struct dst_entry *dst; + struct sk_buff *skb; + +@@ -575,7 +576,8 @@ void tcp_fastopen_active_disable_ofo_che + } else if (tp->syn_fastopen_ch && + atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times)) { + dst = sk_dst_get(sk); +- if (!(dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK))) ++ dev = dst ? dst_dev(dst) : NULL; ++ if (!(dev && (dev->flags & IFF_LOOPBACK))) + atomic_set(&sock_net(sk)->ipv4.tfo_active_disable_times, 0); + dst_release(dst); + } +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -786,7 +786,7 @@ static void tcp_v4_send_reset(const stru + arg.iov[0].iov_base = (unsigned char *)&rep; + arg.iov[0].iov_len = sizeof(rep.th); + +- net = sk ? sock_net(sk) : dev_net_rcu(skb_dst(skb)->dev); ++ net = sk ? sock_net(sk) : skb_dst_dev_net_rcu(skb); + + /* Invalid TCP option size or twice included auth */ + if (tcp_parse_auth_options(tcp_hdr(skb), &md5_hash_location, &aoh)) +--- a/net/ipv4/tcp_metrics.c ++++ b/net/ipv4/tcp_metrics.c +@@ -166,11 +166,11 @@ static struct tcp_metrics_block *tcpm_ne + unsigned int hash) + { + struct tcp_metrics_block *tm; +- struct net *net; + bool reclaim = false; ++ struct net *net; + + spin_lock_bh(&tcp_metrics_lock); +- net = dev_net_rcu(dst->dev); ++ net = dev_net_rcu(dst_dev(dst)); + + /* While waiting for the spin-lock the cache might have been populated + * with this entry and so we have to check again. +@@ -273,7 +273,7 @@ static struct tcp_metrics_block *__tcp_g + return NULL; + } + +- net = dev_net_rcu(dst->dev); ++ net = dev_net_rcu(dst_dev(dst)); + hash ^= net_hash_mix(net); + hash = hash_32(hash, tcp_metrics_hash_log); + +@@ -318,7 +318,7 @@ static struct tcp_metrics_block *tcp_get + else + return NULL; + +- net = dev_net_rcu(dst->dev); ++ net = dev_net_rcu(dst_dev(dst)); + hash ^= net_hash_mix(net); + hash = hash_32(hash, tcp_metrics_hash_log); + +--- a/net/ipv4/xfrm4_output.c ++++ b/net/ipv4/xfrm4_output.c +@@ -31,7 +31,7 @@ static int __xfrm4_output(struct net *ne + int xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb) + { + return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, +- net, sk, skb, skb->dev, skb_dst(skb)->dev, ++ net, sk, skb, skb->dev, skb_dst_dev(skb), + __xfrm4_output, + !(IPCB(skb)->flags & IPSKB_REROUTED)); + } diff --git a/queue-6.12/ixgbevf-add-support-for-intel-r-e610-device.patch b/queue-6.12/ixgbevf-add-support-for-intel-r-e610-device.patch new file mode 100644 index 0000000000..8642e7d925 --- /dev/null +++ b/queue-6.12/ixgbevf-add-support-for-intel-r-e610-device.patch @@ -0,0 +1,171 @@ +From stable+bounces-188208-greg=kroah.com@vger.kernel.org Mon Oct 20 19:28:52 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 13:28:39 -0400 +Subject: ixgbevf: Add support for Intel(R) E610 device +To: stable@vger.kernel.org +Cc: Piotr Kwapulinski , Przemek Kitszel , Simon Horman , Rafal Romanowski , Tony Nguyen , Sasha Levin +Message-ID: <20251020172841.1850940-2-sashal@kernel.org> + +From: Piotr Kwapulinski + +[ Upstream commit 4c44b450c69b676955c2790dcf467c1f969d80f1 ] + +Add support for Intel(R) E610 Series of network devices. The E610 +is based on X550 but adds firmware managed link, enhanced security +capabilities and support for updated server manageability + +Reviewed-by: Przemek Kitszel +Signed-off-by: Piotr Kwapulinski +Reviewed-by: Simon Horman +Tested-by: Rafal Romanowski +Signed-off-by: Tony Nguyen +Stable-dep-of: a7075f501bd3 ("ixgbevf: fix mailbox API compatibility by negotiating supported features") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/intel/ixgbevf/defines.h | 5 ++++- + drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 6 +++++- + drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 12 ++++++++++-- + drivers/net/ethernet/intel/ixgbevf/vf.c | 12 +++++++++++- + drivers/net/ethernet/intel/ixgbevf/vf.h | 4 +++- + 5 files changed, 33 insertions(+), 6 deletions(-) + +--- a/drivers/net/ethernet/intel/ixgbevf/defines.h ++++ b/drivers/net/ethernet/intel/ixgbevf/defines.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: GPL-2.0 */ +-/* Copyright(c) 1999 - 2018 Intel Corporation. */ ++/* Copyright(c) 1999 - 2024 Intel Corporation. */ + + #ifndef _IXGBEVF_DEFINES_H_ + #define _IXGBEVF_DEFINES_H_ +@@ -16,6 +16,9 @@ + #define IXGBE_DEV_ID_X550_VF_HV 0x1564 + #define IXGBE_DEV_ID_X550EM_X_VF_HV 0x15A9 + ++#define IXGBE_DEV_ID_E610_VF 0x57AD ++#define IXGBE_SUBDEV_ID_E610_VF_HV 0x00FF ++ + #define IXGBE_VF_IRQ_CLEAR_MASK 7 + #define IXGBE_VF_MAX_TX_QUEUES 8 + #define IXGBE_VF_MAX_RX_QUEUES 8 +--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h ++++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: GPL-2.0 */ +-/* Copyright(c) 1999 - 2018 Intel Corporation. */ ++/* Copyright(c) 1999 - 2024 Intel Corporation. */ + + #ifndef _IXGBEVF_H_ + #define _IXGBEVF_H_ +@@ -418,6 +418,8 @@ enum ixgbevf_boards { + board_X550EM_x_vf, + board_X550EM_x_vf_hv, + board_x550em_a_vf, ++ board_e610_vf, ++ board_e610_vf_hv, + }; + + enum ixgbevf_xcast_modes { +@@ -434,11 +436,13 @@ extern const struct ixgbevf_info ixgbevf + extern const struct ixgbe_mbx_operations ixgbevf_mbx_ops; + extern const struct ixgbe_mbx_operations ixgbevf_mbx_ops_legacy; + extern const struct ixgbevf_info ixgbevf_x550em_a_vf_info; ++extern const struct ixgbevf_info ixgbevf_e610_vf_info; + + extern const struct ixgbevf_info ixgbevf_82599_vf_hv_info; + extern const struct ixgbevf_info ixgbevf_X540_vf_hv_info; + extern const struct ixgbevf_info ixgbevf_X550_vf_hv_info; + extern const struct ixgbevf_info ixgbevf_X550EM_x_vf_hv_info; ++extern const struct ixgbevf_info ixgbevf_e610_vf_hv_info; + extern const struct ixgbe_mbx_operations ixgbevf_hv_mbx_ops; + + /* needed by ethtool.c */ +--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c ++++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +@@ -1,5 +1,5 @@ + // SPDX-License-Identifier: GPL-2.0 +-/* Copyright(c) 1999 - 2018 Intel Corporation. */ ++/* Copyright(c) 1999 - 2024 Intel Corporation. */ + + /****************************************************************************** + Copyright (c)2006 - 2007 Myricom, Inc. for some LRO specific code +@@ -39,7 +39,7 @@ static const char ixgbevf_driver_string[ + "Intel(R) 10 Gigabit PCI Express Virtual Function Network Driver"; + + static char ixgbevf_copyright[] = +- "Copyright (c) 2009 - 2018 Intel Corporation."; ++ "Copyright (c) 2009 - 2024 Intel Corporation."; + + static const struct ixgbevf_info *ixgbevf_info_tbl[] = { + [board_82599_vf] = &ixgbevf_82599_vf_info, +@@ -51,6 +51,8 @@ static const struct ixgbevf_info *ixgbev + [board_X550EM_x_vf] = &ixgbevf_X550EM_x_vf_info, + [board_X550EM_x_vf_hv] = &ixgbevf_X550EM_x_vf_hv_info, + [board_x550em_a_vf] = &ixgbevf_x550em_a_vf_info, ++ [board_e610_vf] = &ixgbevf_e610_vf_info, ++ [board_e610_vf_hv] = &ixgbevf_e610_vf_hv_info, + }; + + /* ixgbevf_pci_tbl - PCI Device ID Table +@@ -71,6 +73,9 @@ static const struct pci_device_id ixgbev + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_X_VF), board_X550EM_x_vf }, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_X_VF_HV), board_X550EM_x_vf_hv}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_VF), board_x550em_a_vf }, ++ {PCI_VDEVICE_SUB(INTEL, IXGBE_DEV_ID_E610_VF, PCI_ANY_ID, ++ IXGBE_SUBDEV_ID_E610_VF_HV), board_e610_vf_hv}, ++ {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_E610_VF), board_e610_vf}, + /* required last entry */ + {0, } + }; +@@ -4693,6 +4698,9 @@ static int ixgbevf_probe(struct pci_dev + case ixgbe_mac_X540_vf: + dev_info(&pdev->dev, "Intel(R) X540 Virtual Function\n"); + break; ++ case ixgbe_mac_e610_vf: ++ dev_info(&pdev->dev, "Intel(R) E610 Virtual Function\n"); ++ break; + case ixgbe_mac_82599_vf: + default: + dev_info(&pdev->dev, "Intel(R) 82599 Virtual Function\n"); +--- a/drivers/net/ethernet/intel/ixgbevf/vf.c ++++ b/drivers/net/ethernet/intel/ixgbevf/vf.c +@@ -1,5 +1,5 @@ + // SPDX-License-Identifier: GPL-2.0 +-/* Copyright(c) 1999 - 2018 Intel Corporation. */ ++/* Copyright(c) 1999 - 2024 Intel Corporation. */ + + #include "vf.h" + #include "ixgbevf.h" +@@ -1076,3 +1076,13 @@ const struct ixgbevf_info ixgbevf_x550em + .mac = ixgbe_mac_x550em_a_vf, + .mac_ops = &ixgbevf_mac_ops, + }; ++ ++const struct ixgbevf_info ixgbevf_e610_vf_info = { ++ .mac = ixgbe_mac_e610_vf, ++ .mac_ops = &ixgbevf_mac_ops, ++}; ++ ++const struct ixgbevf_info ixgbevf_e610_vf_hv_info = { ++ .mac = ixgbe_mac_e610_vf, ++ .mac_ops = &ixgbevf_hv_mac_ops, ++}; +--- a/drivers/net/ethernet/intel/ixgbevf/vf.h ++++ b/drivers/net/ethernet/intel/ixgbevf/vf.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: GPL-2.0 */ +-/* Copyright(c) 1999 - 2018 Intel Corporation. */ ++/* Copyright(c) 1999 - 2024 Intel Corporation. */ + + #ifndef __IXGBE_VF_H__ + #define __IXGBE_VF_H__ +@@ -54,6 +54,8 @@ enum ixgbe_mac_type { + ixgbe_mac_X550_vf, + ixgbe_mac_X550EM_x_vf, + ixgbe_mac_x550em_a_vf, ++ ixgbe_mac_e610, ++ ixgbe_mac_e610_vf, + ixgbe_num_macs + }; + diff --git a/queue-6.12/ixgbevf-fix-getting-link-speed-data-for-e610-devices.patch b/queue-6.12/ixgbevf-fix-getting-link-speed-data-for-e610-devices.patch new file mode 100644 index 0000000000..12bbe6c280 --- /dev/null +++ b/queue-6.12/ixgbevf-fix-getting-link-speed-data-for-e610-devices.patch @@ -0,0 +1,306 @@ +From stable+bounces-188209-greg=kroah.com@vger.kernel.org Mon Oct 20 19:28:52 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 13:28:40 -0400 +Subject: ixgbevf: fix getting link speed data for E610 devices +To: stable@vger.kernel.org +Cc: Jedrzej Jagielski , Andrzej Wilczynski , Przemek Kitszel , Aleksandr Loktionov , Rafal Romanowski , Jacob Keller , Jakub Kicinski , Sasha Levin +Message-ID: <20251020172841.1850940-3-sashal@kernel.org> + +From: Jedrzej Jagielski + +[ Upstream commit 53f0eb62b4d23d40686f2dd51776b8220f2887bb ] + +E610 adapters no longer use the VFLINKS register to read PF's link +speed and linkup state. As a result VF driver cannot get actual link +state and it incorrectly reports 10G which is the default option. +It leads to a situation where even 1G adapters print 10G as actual +link speed. The same happens when PF driver set speed different than 10G. + +Add new mailbox operation to let the VF driver request a PF driver +to provide actual link data. Update the mailbox api to v1.6. + +Incorporate both ways of getting link status within the legacy +ixgbe_check_mac_link_vf() function. + +Fixes: 4c44b450c69b ("ixgbevf: Add support for Intel(R) E610 device") +Co-developed-by: Andrzej Wilczynski +Signed-off-by: Andrzej Wilczynski +Reviewed-by: Przemek Kitszel +Reviewed-by: Aleksandr Loktionov +Cc: stable@vger.kernel.org +Signed-off-by: Jedrzej Jagielski +Tested-by: Rafal Romanowski +Signed-off-by: Jacob Keller +Link: https://patch.msgid.link/20251009-jk-iwl-net-2025-10-01-v3-2-ef32a425b92a@intel.com +Signed-off-by: Jakub Kicinski +Stable-dep-of: a7075f501bd3 ("ixgbevf: fix mailbox API compatibility by negotiating supported features") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/intel/ixgbevf/defines.h | 1 + drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 6 + drivers/net/ethernet/intel/ixgbevf/mbx.h | 4 + drivers/net/ethernet/intel/ixgbevf/vf.c | 137 +++++++++++++++++----- + 4 files changed, 116 insertions(+), 32 deletions(-) + +--- a/drivers/net/ethernet/intel/ixgbevf/defines.h ++++ b/drivers/net/ethernet/intel/ixgbevf/defines.h +@@ -28,6 +28,7 @@ + + /* Link speed */ + typedef u32 ixgbe_link_speed; ++#define IXGBE_LINK_SPEED_UNKNOWN 0 + #define IXGBE_LINK_SPEED_1GB_FULL 0x0020 + #define IXGBE_LINK_SPEED_10GB_FULL 0x0080 + #define IXGBE_LINK_SPEED_100_FULL 0x0008 +--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c ++++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +@@ -2278,6 +2278,7 @@ static void ixgbevf_negotiate_api(struct + { + struct ixgbe_hw *hw = &adapter->hw; + static const int api[] = { ++ ixgbe_mbox_api_16, + ixgbe_mbox_api_15, + ixgbe_mbox_api_14, + ixgbe_mbox_api_13, +@@ -2297,7 +2298,8 @@ static void ixgbevf_negotiate_api(struct + idx++; + } + +- if (hw->api_version >= ixgbe_mbox_api_15) { ++ /* Following is not supported by API 1.6, it is specific for 1.5 */ ++ if (hw->api_version == ixgbe_mbox_api_15) { + hw->mbx.ops.init_params(hw); + memcpy(&hw->mbx.ops, &ixgbevf_mbx_ops, + sizeof(struct ixgbe_mbx_operations)); +@@ -2654,6 +2656,7 @@ static void ixgbevf_set_num_queues(struc + case ixgbe_mbox_api_13: + case ixgbe_mbox_api_14: + case ixgbe_mbox_api_15: ++ case ixgbe_mbox_api_16: + if (adapter->xdp_prog && + hw->mac.max_tx_queues == rss) + rss = rss > 3 ? 2 : 1; +@@ -4648,6 +4651,7 @@ static int ixgbevf_probe(struct pci_dev + case ixgbe_mbox_api_13: + case ixgbe_mbox_api_14: + case ixgbe_mbox_api_15: ++ case ixgbe_mbox_api_16: + netdev->max_mtu = IXGBE_MAX_JUMBO_FRAME_SIZE - + (ETH_HLEN + ETH_FCS_LEN); + break; +--- a/drivers/net/ethernet/intel/ixgbevf/mbx.h ++++ b/drivers/net/ethernet/intel/ixgbevf/mbx.h +@@ -66,6 +66,7 @@ enum ixgbe_pfvf_api_rev { + ixgbe_mbox_api_13, /* API version 1.3, linux/freebsd VF driver */ + ixgbe_mbox_api_14, /* API version 1.4, linux/freebsd VF driver */ + ixgbe_mbox_api_15, /* API version 1.5, linux/freebsd VF driver */ ++ ixgbe_mbox_api_16, /* API version 1.6, linux/freebsd VF driver */ + /* This value should always be last */ + ixgbe_mbox_api_unknown, /* indicates that API version is not known */ + }; +@@ -102,6 +103,9 @@ enum ixgbe_pfvf_api_rev { + + #define IXGBE_VF_GET_LINK_STATE 0x10 /* get vf link state */ + ++/* mailbox API, version 1.6 VF requests */ ++#define IXGBE_VF_GET_PF_LINK_STATE 0x11 /* request PF to send link info */ ++ + /* length of permanent address message returned from PF */ + #define IXGBE_VF_PERMADDR_MSG_LEN 4 + /* word in permanent address message with the current multicast type */ +--- a/drivers/net/ethernet/intel/ixgbevf/vf.c ++++ b/drivers/net/ethernet/intel/ixgbevf/vf.c +@@ -313,6 +313,7 @@ int ixgbevf_get_reta_locked(struct ixgbe + * is not supported for this device type. + */ + switch (hw->api_version) { ++ case ixgbe_mbox_api_16: + case ixgbe_mbox_api_15: + case ixgbe_mbox_api_14: + case ixgbe_mbox_api_13: +@@ -382,6 +383,7 @@ int ixgbevf_get_rss_key_locked(struct ix + * or if the operation is not supported for this device type. + */ + switch (hw->api_version) { ++ case ixgbe_mbox_api_16: + case ixgbe_mbox_api_15: + case ixgbe_mbox_api_14: + case ixgbe_mbox_api_13: +@@ -552,6 +554,7 @@ static s32 ixgbevf_update_xcast_mode(str + case ixgbe_mbox_api_13: + case ixgbe_mbox_api_14: + case ixgbe_mbox_api_15: ++ case ixgbe_mbox_api_16: + break; + default: + return -EOPNOTSUPP; +@@ -625,6 +628,48 @@ static s32 ixgbevf_hv_get_link_state_vf( + } + + /** ++ * ixgbevf_get_pf_link_state - Get PF's link status ++ * @hw: pointer to the HW structure ++ * @speed: link speed ++ * @link_up: indicate if link is up/down ++ * ++ * Ask PF to provide link_up state and speed of the link. ++ * ++ * Return: IXGBE_ERR_MBX in the case of mailbox error, ++ * -EOPNOTSUPP if the op is not supported or 0 on success. ++ */ ++static int ixgbevf_get_pf_link_state(struct ixgbe_hw *hw, ixgbe_link_speed *speed, ++ bool *link_up) ++{ ++ u32 msgbuf[3] = {}; ++ int err; ++ ++ switch (hw->api_version) { ++ case ixgbe_mbox_api_16: ++ break; ++ default: ++ return -EOPNOTSUPP; ++ } ++ ++ msgbuf[0] = IXGBE_VF_GET_PF_LINK_STATE; ++ ++ err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, ++ ARRAY_SIZE(msgbuf)); ++ if (err || (msgbuf[0] & IXGBE_VT_MSGTYPE_FAILURE)) { ++ err = IXGBE_ERR_MBX; ++ *speed = IXGBE_LINK_SPEED_UNKNOWN; ++ /* No need to set @link_up to false as it will be done by ++ * ixgbe_check_mac_link_vf(). ++ */ ++ } else { ++ *speed = msgbuf[1]; ++ *link_up = msgbuf[2]; ++ } ++ ++ return err; ++} ++ ++/** + * ixgbevf_set_vfta_vf - Set/Unset VLAN filter table address + * @hw: pointer to the HW structure + * @vlan: 12 bit VLAN ID +@@ -659,6 +704,58 @@ mbx_err: + } + + /** ++ * ixgbe_read_vflinks - Read VFLINKS register ++ * @hw: pointer to the HW structure ++ * @speed: link speed ++ * @link_up: indicate if link is up/down ++ * ++ * Get linkup status and link speed from the VFLINKS register. ++ */ ++static void ixgbe_read_vflinks(struct ixgbe_hw *hw, ixgbe_link_speed *speed, ++ bool *link_up) ++{ ++ u32 vflinks = IXGBE_READ_REG(hw, IXGBE_VFLINKS); ++ ++ /* if link status is down no point in checking to see if PF is up */ ++ if (!(vflinks & IXGBE_LINKS_UP)) { ++ *link_up = false; ++ return; ++ } ++ ++ /* for SFP+ modules and DA cables on 82599 it can take up to 500usecs ++ * before the link status is correct ++ */ ++ if (hw->mac.type == ixgbe_mac_82599_vf) { ++ for (int i = 0; i < 5; i++) { ++ udelay(100); ++ vflinks = IXGBE_READ_REG(hw, IXGBE_VFLINKS); ++ ++ if (!(vflinks & IXGBE_LINKS_UP)) { ++ *link_up = false; ++ return; ++ } ++ } ++ } ++ ++ /* We reached this point so there's link */ ++ *link_up = true; ++ ++ switch (vflinks & IXGBE_LINKS_SPEED_82599) { ++ case IXGBE_LINKS_SPEED_10G_82599: ++ *speed = IXGBE_LINK_SPEED_10GB_FULL; ++ break; ++ case IXGBE_LINKS_SPEED_1G_82599: ++ *speed = IXGBE_LINK_SPEED_1GB_FULL; ++ break; ++ case IXGBE_LINKS_SPEED_100_82599: ++ *speed = IXGBE_LINK_SPEED_100_FULL; ++ break; ++ default: ++ *speed = IXGBE_LINK_SPEED_UNKNOWN; ++ } ++} ++ ++/** + * ixgbevf_hv_set_vfta_vf - * Hyper-V variant - just a stub. + * @hw: unused + * @vlan: unused +@@ -705,7 +802,6 @@ static s32 ixgbevf_check_mac_link_vf(str + struct ixgbe_mbx_info *mbx = &hw->mbx; + struct ixgbe_mac_info *mac = &hw->mac; + s32 ret_val = 0; +- u32 links_reg; + u32 in_msg = 0; + + /* If we were hit with a reset drop the link */ +@@ -715,36 +811,14 @@ static s32 ixgbevf_check_mac_link_vf(str + if (!mac->get_link_status) + goto out; + +- /* if link status is down no point in checking to see if pf is up */ +- links_reg = IXGBE_READ_REG(hw, IXGBE_VFLINKS); +- if (!(links_reg & IXGBE_LINKS_UP)) +- goto out; +- +- /* for SFP+ modules and DA cables on 82599 it can take up to 500usecs +- * before the link status is correct +- */ +- if (mac->type == ixgbe_mac_82599_vf) { +- int i; +- +- for (i = 0; i < 5; i++) { +- udelay(100); +- links_reg = IXGBE_READ_REG(hw, IXGBE_VFLINKS); +- +- if (!(links_reg & IXGBE_LINKS_UP)) +- goto out; +- } +- } +- +- switch (links_reg & IXGBE_LINKS_SPEED_82599) { +- case IXGBE_LINKS_SPEED_10G_82599: +- *speed = IXGBE_LINK_SPEED_10GB_FULL; +- break; +- case IXGBE_LINKS_SPEED_1G_82599: +- *speed = IXGBE_LINK_SPEED_1GB_FULL; +- break; +- case IXGBE_LINKS_SPEED_100_82599: +- *speed = IXGBE_LINK_SPEED_100_FULL; +- break; ++ if (hw->mac.type == ixgbe_mac_e610_vf) { ++ ret_val = ixgbevf_get_pf_link_state(hw, speed, link_up); ++ if (ret_val) ++ goto out; ++ } else { ++ ixgbe_read_vflinks(hw, speed, link_up); ++ if (*link_up == false) ++ goto out; + } + + /* if the read failed it could just be a mailbox collision, best wait +@@ -951,6 +1025,7 @@ int ixgbevf_get_queues(struct ixgbe_hw * + case ixgbe_mbox_api_13: + case ixgbe_mbox_api_14: + case ixgbe_mbox_api_15: ++ case ixgbe_mbox_api_16: + break; + default: + return 0; diff --git a/queue-6.12/ixgbevf-fix-mailbox-api-compatibility-by-negotiating-supported-features.patch b/queue-6.12/ixgbevf-fix-mailbox-api-compatibility-by-negotiating-supported-features.patch new file mode 100644 index 0000000000..0650c0f5bf --- /dev/null +++ b/queue-6.12/ixgbevf-fix-mailbox-api-compatibility-by-negotiating-supported-features.patch @@ -0,0 +1,327 @@ +From stable+bounces-188210-greg=kroah.com@vger.kernel.org Mon Oct 20 19:28:53 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 13:28:41 -0400 +Subject: ixgbevf: fix mailbox API compatibility by negotiating supported features +To: stable@vger.kernel.org +Cc: Jedrzej Jagielski , Jacob Keller , Przemek Kitszel , Aleksandr Loktionov , Rafal Romanowski , Jakub Kicinski , Sasha Levin +Message-ID: <20251020172841.1850940-4-sashal@kernel.org> + +From: Jedrzej Jagielski + +[ Upstream commit a7075f501bd33c93570af759b6f4302ef0175168 ] + +There was backward compatibility in the terms of mailbox API. Various +drivers from various OSes supporting 10G adapters from Intel portfolio +could easily negotiate mailbox API. + +This convention has been broken since introducing API 1.4. +Commit 0062e7cc955e ("ixgbevf: add VF IPsec offload code") added support +for IPSec which is specific only for the kernel ixgbe driver. None of the +rest of the Intel 10G PF/VF drivers supports it. And actually lack of +support was not included in the IPSec implementation - there were no such +code paths. No possibility to negotiate support for the feature was +introduced along with introduction of the feature itself. + +Commit 339f28964147 ("ixgbevf: Add support for new mailbox communication +between PF and VF") increasing API version to 1.5 did the same - it +introduced code supported specifically by the PF ESX driver. It altered API +version for the VF driver in the same time not touching the version +defined for the PF ixgbe driver. It led to additional discrepancies, +as the code provided within API 1.6 cannot be supported for Linux ixgbe +driver as it causes crashes. + +The issue was noticed some time ago and mitigated by Jake within the commit +d0725312adf5 ("ixgbevf: stop attempting IPSEC offload on Mailbox API 1.5"). +As a result we have regression for IPsec support and after increasing API +to version 1.6 ixgbevf driver stopped to support ESX MBX. + +To fix this mess add new mailbox op asking PF driver about supported +features. Basing on a response determine whether to set support for IPSec +and ESX-specific enhanced mailbox. + +New mailbox op, for compatibility purposes, must be added within new API +revision, as API version of OOT PF & VF drivers is already increased to +1.6 and doesn't incorporate features negotiate op. + +Features negotiation mechanism gives possibility to be extended with new +features when needed in the future. + +Reported-by: Jacob Keller +Closes: https://lore.kernel.org/intel-wired-lan/20241101-jk-ixgbevf-mailbox-v1-5-fixes-v1-0-f556dc9a66ed@intel.com/ +Fixes: 0062e7cc955e ("ixgbevf: add VF IPsec offload code") +Fixes: 339f28964147 ("ixgbevf: Add support for new mailbox communication between PF and VF") +Reviewed-by: Jacob Keller +Reviewed-by: Przemek Kitszel +Reviewed-by: Aleksandr Loktionov +Cc: stable@vger.kernel.org +Signed-off-by: Jedrzej Jagielski +Tested-by: Rafal Romanowski +Signed-off-by: Jacob Keller +Link: https://patch.msgid.link/20251009-jk-iwl-net-2025-10-01-v3-4-ef32a425b92a@intel.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/intel/ixgbevf/ipsec.c | 10 ++++ + drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 7 +++ + drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 32 ++++++++++++++- + drivers/net/ethernet/intel/ixgbevf/mbx.h | 4 + + drivers/net/ethernet/intel/ixgbevf/vf.c | 45 +++++++++++++++++++++- + drivers/net/ethernet/intel/ixgbevf/vf.h | 1 + 6 files changed, 96 insertions(+), 3 deletions(-) + +--- a/drivers/net/ethernet/intel/ixgbevf/ipsec.c ++++ b/drivers/net/ethernet/intel/ixgbevf/ipsec.c +@@ -271,6 +271,9 @@ static int ixgbevf_ipsec_add_sa(struct x + adapter = netdev_priv(dev); + ipsec = adapter->ipsec; + ++ if (!(adapter->pf_features & IXGBEVF_PF_SUP_IPSEC)) ++ return -EOPNOTSUPP; ++ + if (xs->id.proto != IPPROTO_ESP && xs->id.proto != IPPROTO_AH) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported protocol for IPsec offload"); + return -EINVAL; +@@ -400,6 +403,9 @@ static void ixgbevf_ipsec_del_sa(struct + adapter = netdev_priv(dev); + ipsec = adapter->ipsec; + ++ if (!(adapter->pf_features & IXGBEVF_PF_SUP_IPSEC)) ++ return; ++ + if (xs->xso.dir == XFRM_DEV_OFFLOAD_IN) { + sa_idx = xs->xso.offload_handle - IXGBE_IPSEC_BASE_RX_INDEX; + +@@ -628,6 +634,10 @@ void ixgbevf_init_ipsec_offload(struct i + size_t size; + + switch (adapter->hw.api_version) { ++ case ixgbe_mbox_api_17: ++ if (!(adapter->pf_features & IXGBEVF_PF_SUP_IPSEC)) ++ return; ++ break; + case ixgbe_mbox_api_14: + break; + default: +--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h ++++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +@@ -366,6 +366,13 @@ struct ixgbevf_adapter { + /* Interrupt Throttle Rate */ + u32 eitr_param; + ++ u32 pf_features; ++#define IXGBEVF_PF_SUP_IPSEC BIT(0) ++#define IXGBEVF_PF_SUP_ESX_MBX BIT(1) ++ ++#define IXGBEVF_SUPPORTED_FEATURES (IXGBEVF_PF_SUP_IPSEC | \ ++ IXGBEVF_PF_SUP_ESX_MBX) ++ + struct ixgbevf_hw_stats stats; + + unsigned long state; +--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c ++++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +@@ -2274,10 +2274,35 @@ static void ixgbevf_init_last_counter_st + adapter->stats.base_vfmprc = adapter->stats.last_vfmprc; + } + ++/** ++ * ixgbevf_set_features - Set features supported by PF ++ * @adapter: pointer to the adapter struct ++ * ++ * Negotiate with PF supported features and then set pf_features accordingly. ++ */ ++static void ixgbevf_set_features(struct ixgbevf_adapter *adapter) ++{ ++ u32 *pf_features = &adapter->pf_features; ++ struct ixgbe_hw *hw = &adapter->hw; ++ int err; ++ ++ err = hw->mac.ops.negotiate_features(hw, pf_features); ++ if (err && err != -EOPNOTSUPP) ++ netdev_dbg(adapter->netdev, ++ "PF feature negotiation failed.\n"); ++ ++ /* Address also pre API 1.7 cases */ ++ if (hw->api_version == ixgbe_mbox_api_14) ++ *pf_features |= IXGBEVF_PF_SUP_IPSEC; ++ else if (hw->api_version == ixgbe_mbox_api_15) ++ *pf_features |= IXGBEVF_PF_SUP_ESX_MBX; ++} ++ + static void ixgbevf_negotiate_api(struct ixgbevf_adapter *adapter) + { + struct ixgbe_hw *hw = &adapter->hw; + static const int api[] = { ++ ixgbe_mbox_api_17, + ixgbe_mbox_api_16, + ixgbe_mbox_api_15, + ixgbe_mbox_api_14, +@@ -2298,8 +2323,9 @@ static void ixgbevf_negotiate_api(struct + idx++; + } + +- /* Following is not supported by API 1.6, it is specific for 1.5 */ +- if (hw->api_version == ixgbe_mbox_api_15) { ++ ixgbevf_set_features(adapter); ++ ++ if (adapter->pf_features & IXGBEVF_PF_SUP_ESX_MBX) { + hw->mbx.ops.init_params(hw); + memcpy(&hw->mbx.ops, &ixgbevf_mbx_ops, + sizeof(struct ixgbe_mbx_operations)); +@@ -2657,6 +2683,7 @@ static void ixgbevf_set_num_queues(struc + case ixgbe_mbox_api_14: + case ixgbe_mbox_api_15: + case ixgbe_mbox_api_16: ++ case ixgbe_mbox_api_17: + if (adapter->xdp_prog && + hw->mac.max_tx_queues == rss) + rss = rss > 3 ? 2 : 1; +@@ -4652,6 +4679,7 @@ static int ixgbevf_probe(struct pci_dev + case ixgbe_mbox_api_14: + case ixgbe_mbox_api_15: + case ixgbe_mbox_api_16: ++ case ixgbe_mbox_api_17: + netdev->max_mtu = IXGBE_MAX_JUMBO_FRAME_SIZE - + (ETH_HLEN + ETH_FCS_LEN); + break; +--- a/drivers/net/ethernet/intel/ixgbevf/mbx.h ++++ b/drivers/net/ethernet/intel/ixgbevf/mbx.h +@@ -67,6 +67,7 @@ enum ixgbe_pfvf_api_rev { + ixgbe_mbox_api_14, /* API version 1.4, linux/freebsd VF driver */ + ixgbe_mbox_api_15, /* API version 1.5, linux/freebsd VF driver */ + ixgbe_mbox_api_16, /* API version 1.6, linux/freebsd VF driver */ ++ ixgbe_mbox_api_17, /* API version 1.7, linux/freebsd VF driver */ + /* This value should always be last */ + ixgbe_mbox_api_unknown, /* indicates that API version is not known */ + }; +@@ -106,6 +107,9 @@ enum ixgbe_pfvf_api_rev { + /* mailbox API, version 1.6 VF requests */ + #define IXGBE_VF_GET_PF_LINK_STATE 0x11 /* request PF to send link info */ + ++/* mailbox API, version 1.7 VF requests */ ++#define IXGBE_VF_FEATURES_NEGOTIATE 0x12 /* get features supported by PF*/ ++ + /* length of permanent address message returned from PF */ + #define IXGBE_VF_PERMADDR_MSG_LEN 4 + /* word in permanent address message with the current multicast type */ +--- a/drivers/net/ethernet/intel/ixgbevf/vf.c ++++ b/drivers/net/ethernet/intel/ixgbevf/vf.c +@@ -313,6 +313,7 @@ int ixgbevf_get_reta_locked(struct ixgbe + * is not supported for this device type. + */ + switch (hw->api_version) { ++ case ixgbe_mbox_api_17: + case ixgbe_mbox_api_16: + case ixgbe_mbox_api_15: + case ixgbe_mbox_api_14: +@@ -383,6 +384,7 @@ int ixgbevf_get_rss_key_locked(struct ix + * or if the operation is not supported for this device type. + */ + switch (hw->api_version) { ++ case ixgbe_mbox_api_17: + case ixgbe_mbox_api_16: + case ixgbe_mbox_api_15: + case ixgbe_mbox_api_14: +@@ -555,6 +557,7 @@ static s32 ixgbevf_update_xcast_mode(str + case ixgbe_mbox_api_14: + case ixgbe_mbox_api_15: + case ixgbe_mbox_api_16: ++ case ixgbe_mbox_api_17: + break; + default: + return -EOPNOTSUPP; +@@ -646,6 +649,7 @@ static int ixgbevf_get_pf_link_state(str + + switch (hw->api_version) { + case ixgbe_mbox_api_16: ++ case ixgbe_mbox_api_17: + break; + default: + return -EOPNOTSUPP; +@@ -670,6 +674,42 @@ static int ixgbevf_get_pf_link_state(str + } + + /** ++ * ixgbevf_negotiate_features_vf - negotiate supported features with PF driver ++ * @hw: pointer to the HW structure ++ * @pf_features: bitmask of features supported by PF ++ * ++ * Return: IXGBE_ERR_MBX in the case of mailbox error, ++ * -EOPNOTSUPP if the op is not supported or 0 on success. ++ */ ++static int ixgbevf_negotiate_features_vf(struct ixgbe_hw *hw, u32 *pf_features) ++{ ++ u32 msgbuf[2] = {}; ++ int err; ++ ++ switch (hw->api_version) { ++ case ixgbe_mbox_api_17: ++ break; ++ default: ++ return -EOPNOTSUPP; ++ } ++ ++ msgbuf[0] = IXGBE_VF_FEATURES_NEGOTIATE; ++ msgbuf[1] = IXGBEVF_SUPPORTED_FEATURES; ++ ++ err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, ++ ARRAY_SIZE(msgbuf)); ++ ++ if (err || (msgbuf[0] & IXGBE_VT_MSGTYPE_FAILURE)) { ++ err = IXGBE_ERR_MBX; ++ *pf_features = 0x0; ++ } else { ++ *pf_features = msgbuf[1]; ++ } ++ ++ return err; ++} ++ ++/** + * ixgbevf_set_vfta_vf - Set/Unset VLAN filter table address + * @hw: pointer to the HW structure + * @vlan: 12 bit VLAN ID +@@ -799,6 +839,7 @@ static s32 ixgbevf_check_mac_link_vf(str + bool *link_up, + bool autoneg_wait_to_complete) + { ++ struct ixgbevf_adapter *adapter = hw->back; + struct ixgbe_mbx_info *mbx = &hw->mbx; + struct ixgbe_mac_info *mac = &hw->mac; + s32 ret_val = 0; +@@ -825,7 +866,7 @@ static s32 ixgbevf_check_mac_link_vf(str + * until we are called again and don't report an error + */ + if (mbx->ops.read(hw, &in_msg, 1)) { +- if (hw->api_version >= ixgbe_mbox_api_15) ++ if (adapter->pf_features & IXGBEVF_PF_SUP_ESX_MBX) + mac->get_link_status = false; + goto out; + } +@@ -1026,6 +1067,7 @@ int ixgbevf_get_queues(struct ixgbe_hw * + case ixgbe_mbox_api_14: + case ixgbe_mbox_api_15: + case ixgbe_mbox_api_16: ++ case ixgbe_mbox_api_17: + break; + default: + return 0; +@@ -1080,6 +1122,7 @@ static const struct ixgbe_mac_operations + .setup_link = ixgbevf_setup_mac_link_vf, + .check_link = ixgbevf_check_mac_link_vf, + .negotiate_api_version = ixgbevf_negotiate_api_version_vf, ++ .negotiate_features = ixgbevf_negotiate_features_vf, + .set_rar = ixgbevf_set_rar_vf, + .update_mc_addr_list = ixgbevf_update_mc_addr_list_vf, + .update_xcast_mode = ixgbevf_update_xcast_mode, +--- a/drivers/net/ethernet/intel/ixgbevf/vf.h ++++ b/drivers/net/ethernet/intel/ixgbevf/vf.h +@@ -26,6 +26,7 @@ struct ixgbe_mac_operations { + s32 (*stop_adapter)(struct ixgbe_hw *); + s32 (*get_bus_info)(struct ixgbe_hw *); + s32 (*negotiate_api_version)(struct ixgbe_hw *hw, int api); ++ int (*negotiate_features)(struct ixgbe_hw *hw, u32 *pf_features); + + /* Link */ + s32 (*setup_link)(struct ixgbe_hw *, ixgbe_link_speed, bool, bool); diff --git a/queue-6.12/md-fix-mssing-blktrace-bio-split-events.patch b/queue-6.12/md-fix-mssing-blktrace-bio-split-events.patch new file mode 100644 index 0000000000..61d0e4fa73 --- /dev/null +++ b/queue-6.12/md-fix-mssing-blktrace-bio-split-events.patch @@ -0,0 +1,142 @@ +From stable+bounces-188107-greg=kroah.com@vger.kernel.org Mon Oct 20 15:07:08 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 09:06:49 -0400 +Subject: md: fix mssing blktrace bio split events +To: stable@vger.kernel.org +Cc: Yu Kuai , Damien Le Moal , Christoph Hellwig , Jens Axboe , Sasha Levin +Message-ID: <20251020130649.1765603-4-sashal@kernel.org> + +From: Yu Kuai + +[ Upstream commit 22f166218f7313e8fe2d19213b5f4b3265f8c39e ] + +If bio is split by internal handling like chunksize or badblocks, the +corresponding trace_block_split() is missing, resulting in blktrace +inability to catch BIO split events and making it harder to analyze the +BIO sequence. + +Cc: stable@vger.kernel.org +Fixes: 4b1faf931650 ("block: Kill bio_pair_split()") +Signed-off-by: Yu Kuai +Reviewed-by: Damien Le Moal +Reviewed-by: Christoph Hellwig +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/md-linear.c | 1 + + drivers/md/raid0.c | 4 ++++ + drivers/md/raid1.c | 4 ++++ + drivers/md/raid10.c | 8 ++++++++ + drivers/md/raid5.c | 2 ++ + 5 files changed, 19 insertions(+) + +--- a/drivers/md/md-linear.c ++++ b/drivers/md/md-linear.c +@@ -267,6 +267,7 @@ static bool linear_make_request(struct m + } + + bio_chain(split, bio); ++ trace_block_split(split, bio->bi_iter.bi_sector); + submit_bio_noacct(bio); + bio = split; + } +--- a/drivers/md/raid0.c ++++ b/drivers/md/raid0.c +@@ -470,7 +470,9 @@ static void raid0_handle_discard(struct + bio_endio(bio); + return; + } ++ + bio_chain(split, bio); ++ trace_block_split(split, bio->bi_iter.bi_sector); + submit_bio_noacct(bio); + bio = split; + end = zone->zone_end; +@@ -618,7 +620,9 @@ static bool raid0_make_request(struct md + bio_endio(bio); + return true; + } ++ + bio_chain(split, bio); ++ trace_block_split(split, bio->bi_iter.bi_sector); + raid0_map_submit_bio(mddev, bio); + bio = split; + } +--- a/drivers/md/raid1.c ++++ b/drivers/md/raid1.c +@@ -1383,7 +1383,9 @@ static void raid1_read_request(struct md + error = PTR_ERR(split); + goto err_handle; + } ++ + bio_chain(split, bio); ++ trace_block_split(split, bio->bi_iter.bi_sector); + submit_bio_noacct(bio); + bio = split; + r1_bio->master_bio = bio; +@@ -1574,7 +1576,9 @@ static void raid1_write_request(struct m + error = PTR_ERR(split); + goto err_handle; + } ++ + bio_chain(split, bio); ++ trace_block_split(split, bio->bi_iter.bi_sector); + submit_bio_noacct(bio); + bio = split; + r1_bio->master_bio = bio; +--- a/drivers/md/raid10.c ++++ b/drivers/md/raid10.c +@@ -1208,7 +1208,9 @@ static void raid10_read_request(struct m + error = PTR_ERR(split); + goto err_handle; + } ++ + bio_chain(split, bio); ++ trace_block_split(split, bio->bi_iter.bi_sector); + allow_barrier(conf); + submit_bio_noacct(bio); + wait_barrier(conf, false); +@@ -1484,7 +1486,9 @@ static void raid10_write_request(struct + error = PTR_ERR(split); + goto err_handle; + } ++ + bio_chain(split, bio); ++ trace_block_split(split, bio->bi_iter.bi_sector); + allow_barrier(conf); + submit_bio_noacct(bio); + wait_barrier(conf, false); +@@ -1669,7 +1673,9 @@ static int raid10_handle_discard(struct + bio_endio(bio); + return 0; + } ++ + bio_chain(split, bio); ++ trace_block_split(split, bio->bi_iter.bi_sector); + allow_barrier(conf); + /* Resend the fist split part */ + submit_bio_noacct(split); +@@ -1684,7 +1690,9 @@ static int raid10_handle_discard(struct + bio_endio(bio); + return 0; + } ++ + bio_chain(split, bio); ++ trace_block_split(split, bio->bi_iter.bi_sector); + allow_barrier(conf); + /* Resend the second split part */ + submit_bio_noacct(bio); +--- a/drivers/md/raid5.c ++++ b/drivers/md/raid5.c +@@ -5484,8 +5484,10 @@ static struct bio *chunk_aligned_read(st + + if (sectors < bio_sectors(raid_bio)) { + struct r5conf *conf = mddev->private; ++ + split = bio_split(raid_bio, sectors, GFP_NOIO, &conf->bio_split); + bio_chain(split, raid_bio); ++ trace_block_split(split, raid_bio->bi_iter.bi_sector); + submit_bio_noacct(raid_bio); + raid_bio = split; + } diff --git a/queue-6.12/md-raid0-handle-bio_split-errors.patch b/queue-6.12/md-raid0-handle-bio_split-errors.patch new file mode 100644 index 0000000000..de3a5a2acc --- /dev/null +++ b/queue-6.12/md-raid0-handle-bio_split-errors.patch @@ -0,0 +1,55 @@ +From stable+bounces-188104-greg=kroah.com@vger.kernel.org Mon Oct 20 15:07:00 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 09:06:46 -0400 +Subject: md/raid0: Handle bio_split() errors +To: stable@vger.kernel.org +Cc: John Garry , Yu Kuai , Hannes Reinecke , Jens Axboe , Sasha Levin +Message-ID: <20251020130649.1765603-1-sashal@kernel.org> + +From: John Garry + +[ Upstream commit 74538fdac3e85aae55eb4ed786478ed2384cb85d ] + +Add proper bio_split() error handling. For any error, set bi_status, end +the bio, and return. + +Reviewed-by: Yu Kuai +Reviewed-by: Hannes Reinecke +Signed-off-by: John Garry +Link: https://lore.kernel.org/r/20241111112150.3756529-5-john.g.garry@oracle.com +Signed-off-by: Jens Axboe +Stable-dep-of: 22f166218f73 ("md: fix mssing blktrace bio split events") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/raid0.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +--- a/drivers/md/raid0.c ++++ b/drivers/md/raid0.c +@@ -464,6 +464,12 @@ static void raid0_handle_discard(struct + struct bio *split = bio_split(bio, + zone->zone_end - bio->bi_iter.bi_sector, GFP_NOIO, + &mddev->bio_set); ++ ++ if (IS_ERR(split)) { ++ bio->bi_status = errno_to_blk_status(PTR_ERR(split)); ++ bio_endio(bio); ++ return; ++ } + bio_chain(split, bio); + submit_bio_noacct(bio); + bio = split; +@@ -606,6 +612,12 @@ static bool raid0_make_request(struct md + if (sectors < bio_sectors(bio)) { + struct bio *split = bio_split(bio, sectors, GFP_NOIO, + &mddev->bio_set); ++ ++ if (IS_ERR(split)) { ++ bio->bi_status = errno_to_blk_status(PTR_ERR(split)); ++ bio_endio(bio); ++ return true; ++ } + bio_chain(split, bio); + raid0_map_submit_bio(mddev, bio); + bio = split; diff --git a/queue-6.12/md-raid1-handle-bio_split-errors.patch b/queue-6.12/md-raid1-handle-bio_split-errors.patch new file mode 100644 index 0000000000..ee2baa1b5e --- /dev/null +++ b/queue-6.12/md-raid1-handle-bio_split-errors.patch @@ -0,0 +1,110 @@ +From stable+bounces-188105-greg=kroah.com@vger.kernel.org Mon Oct 20 15:08:36 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 09:06:47 -0400 +Subject: md/raid1: Handle bio_split() errors +To: stable@vger.kernel.org +Cc: John Garry , Yu Kuai , Hannes Reinecke , Jens Axboe , Sasha Levin +Message-ID: <20251020130649.1765603-2-sashal@kernel.org> + +From: John Garry + +[ Upstream commit b1a7ad8b5c4fa28325ee7b369a2d545d3e16ccde ] + +Add proper bio_split() error handling. For any error, call +raid_end_bio_io() and return. + +For the case of an in the write path, we need to undo the increment in +the rdev pending count and NULLify the r1_bio->bios[] pointers. + +For read path failure, we need to undo rdev pending count increment from +the earlier read_balance() call. + +Reviewed-by: Yu Kuai +Reviewed-by: Hannes Reinecke +Signed-off-by: John Garry +Link: https://lore.kernel.org/r/20241111112150.3756529-6-john.g.garry@oracle.com +Signed-off-by: Jens Axboe +Stable-dep-of: 22f166218f73 ("md: fix mssing blktrace bio split events") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/raid1.c | 33 +++++++++++++++++++++++++++++++-- + 1 file changed, 31 insertions(+), 2 deletions(-) + +--- a/drivers/md/raid1.c ++++ b/drivers/md/raid1.c +@@ -1317,7 +1317,7 @@ static void raid1_read_request(struct md + struct raid1_info *mirror; + struct bio *read_bio; + int max_sectors; +- int rdisk; ++ int rdisk, error; + bool r1bio_existed = !!r1_bio; + + /* +@@ -1378,6 +1378,11 @@ static void raid1_read_request(struct md + if (max_sectors < bio_sectors(bio)) { + struct bio *split = bio_split(bio, max_sectors, + gfp, &conf->bio_split); ++ ++ if (IS_ERR(split)) { ++ error = PTR_ERR(split); ++ goto err_handle; ++ } + bio_chain(split, bio); + submit_bio_noacct(bio); + bio = split; +@@ -1404,6 +1409,13 @@ static void raid1_read_request(struct md + read_bio->bi_private = r1_bio; + mddev_trace_remap(mddev, read_bio, r1_bio->sector); + submit_bio_noacct(read_bio); ++ return; ++ ++err_handle: ++ atomic_dec(&mirror->rdev->nr_pending); ++ bio->bi_status = errno_to_blk_status(error); ++ set_bit(R1BIO_Uptodate, &r1_bio->state); ++ raid_end_bio_io(r1_bio); + } + + static void raid1_write_request(struct mddev *mddev, struct bio *bio, +@@ -1411,7 +1423,7 @@ static void raid1_write_request(struct m + { + struct r1conf *conf = mddev->private; + struct r1bio *r1_bio; +- int i, disks; ++ int i, disks, k, error; + unsigned long flags; + struct md_rdev *blocked_rdev; + int first_clone; +@@ -1557,6 +1569,11 @@ static void raid1_write_request(struct m + if (max_sectors < bio_sectors(bio)) { + struct bio *split = bio_split(bio, max_sectors, + GFP_NOIO, &conf->bio_split); ++ ++ if (IS_ERR(split)) { ++ error = PTR_ERR(split); ++ goto err_handle; ++ } + bio_chain(split, bio); + submit_bio_noacct(bio); + bio = split; +@@ -1640,6 +1657,18 @@ static void raid1_write_request(struct m + + /* In case raid1d snuck in to freeze_array */ + wake_up_barrier(conf); ++ return; ++err_handle: ++ for (k = 0; k < i; k++) { ++ if (r1_bio->bios[k]) { ++ rdev_dec_pending(conf->mirrors[k].rdev, mddev); ++ r1_bio->bios[k] = NULL; ++ } ++ } ++ ++ bio->bi_status = errno_to_blk_status(error); ++ set_bit(R1BIO_Uptodate, &r1_bio->state); ++ raid_end_bio_io(r1_bio); + } + + static bool raid1_make_request(struct mddev *mddev, struct bio *bio) diff --git a/queue-6.12/md-raid10-handle-bio_split-errors.patch b/queue-6.12/md-raid10-handle-bio_split-errors.patch new file mode 100644 index 0000000000..769d8d2a86 --- /dev/null +++ b/queue-6.12/md-raid10-handle-bio_split-errors.patch @@ -0,0 +1,135 @@ +From stable+bounces-188106-greg=kroah.com@vger.kernel.org Mon Oct 20 15:07:02 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 09:06:48 -0400 +Subject: md/raid10: Handle bio_split() errors +To: stable@vger.kernel.org +Cc: John Garry , Yu Kuai , Hannes Reinecke , Jens Axboe , Sasha Levin +Message-ID: <20251020130649.1765603-3-sashal@kernel.org> + +From: John Garry + +[ Upstream commit 4cf58d9529097328b669e3c8693ed21e3a041903 ] + +Add proper bio_split() error handling. For any error, call +raid_end_bio_io() and return. Except for discard, where we end the bio +directly. + +Reviewed-by: Yu Kuai +Reviewed-by: Hannes Reinecke +Signed-off-by: John Garry +Link: https://lore.kernel.org/r/20241111112150.3756529-7-john.g.garry@oracle.com +Signed-off-by: Jens Axboe +Stable-dep-of: 22f166218f73 ("md: fix mssing blktrace bio split events") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/raid10.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 46 insertions(+), 1 deletion(-) + +--- a/drivers/md/raid10.c ++++ b/drivers/md/raid10.c +@@ -1153,6 +1153,7 @@ static void raid10_read_request(struct m + int slot = r10_bio->read_slot; + struct md_rdev *err_rdev = NULL; + gfp_t gfp = GFP_NOIO; ++ int error; + + if (slot >= 0 && r10_bio->devs[slot].rdev) { + /* +@@ -1203,6 +1204,10 @@ static void raid10_read_request(struct m + if (max_sectors < bio_sectors(bio)) { + struct bio *split = bio_split(bio, max_sectors, + gfp, &conf->bio_split); ++ if (IS_ERR(split)) { ++ error = PTR_ERR(split); ++ goto err_handle; ++ } + bio_chain(split, bio); + allow_barrier(conf); + submit_bio_noacct(bio); +@@ -1233,6 +1238,11 @@ static void raid10_read_request(struct m + mddev_trace_remap(mddev, read_bio, r10_bio->sector); + submit_bio_noacct(read_bio); + return; ++err_handle: ++ atomic_dec(&rdev->nr_pending); ++ bio->bi_status = errno_to_blk_status(error); ++ set_bit(R10BIO_Uptodate, &r10_bio->state); ++ raid_end_bio_io(r10_bio); + } + + static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, +@@ -1341,9 +1351,10 @@ static void raid10_write_request(struct + struct r10bio *r10_bio) + { + struct r10conf *conf = mddev->private; +- int i; ++ int i, k; + sector_t sectors; + int max_sectors; ++ int error; + + if ((mddev_is_clustered(mddev) && + md_cluster_ops->area_resyncing(mddev, WRITE, +@@ -1469,6 +1480,10 @@ static void raid10_write_request(struct + if (r10_bio->sectors < bio_sectors(bio)) { + struct bio *split = bio_split(bio, r10_bio->sectors, + GFP_NOIO, &conf->bio_split); ++ if (IS_ERR(split)) { ++ error = PTR_ERR(split); ++ goto err_handle; ++ } + bio_chain(split, bio); + allow_barrier(conf); + submit_bio_noacct(bio); +@@ -1488,6 +1503,26 @@ static void raid10_write_request(struct + raid10_write_one_disk(mddev, r10_bio, bio, true, i); + } + one_write_done(r10_bio); ++ return; ++err_handle: ++ for (k = 0; k < i; k++) { ++ int d = r10_bio->devs[k].devnum; ++ struct md_rdev *rdev = conf->mirrors[d].rdev; ++ struct md_rdev *rrdev = conf->mirrors[d].replacement; ++ ++ if (r10_bio->devs[k].bio) { ++ rdev_dec_pending(rdev, mddev); ++ r10_bio->devs[k].bio = NULL; ++ } ++ if (r10_bio->devs[k].repl_bio) { ++ rdev_dec_pending(rrdev, mddev); ++ r10_bio->devs[k].repl_bio = NULL; ++ } ++ } ++ ++ bio->bi_status = errno_to_blk_status(error); ++ set_bit(R10BIO_Uptodate, &r10_bio->state); ++ raid_end_bio_io(r10_bio); + } + + static void __make_request(struct mddev *mddev, struct bio *bio, int sectors) +@@ -1629,6 +1664,11 @@ static int raid10_handle_discard(struct + if (remainder) { + split_size = stripe_size - remainder; + split = bio_split(bio, split_size, GFP_NOIO, &conf->bio_split); ++ if (IS_ERR(split)) { ++ bio->bi_status = errno_to_blk_status(PTR_ERR(split)); ++ bio_endio(bio); ++ return 0; ++ } + bio_chain(split, bio); + allow_barrier(conf); + /* Resend the fist split part */ +@@ -1639,6 +1679,11 @@ static int raid10_handle_discard(struct + if (remainder) { + split_size = bio_sectors(bio) - remainder; + split = bio_split(bio, split_size, GFP_NOIO, &conf->bio_split); ++ if (IS_ERR(split)) { ++ bio->bi_status = errno_to_blk_status(PTR_ERR(split)); ++ bio_endio(bio); ++ return 0; ++ } + bio_chain(split, bio); + allow_barrier(conf); + /* Resend the second split part */ diff --git a/queue-6.12/mptcp-call-dst_release-in-mptcp_active_enable.patch b/queue-6.12/mptcp-call-dst_release-in-mptcp_active_enable.patch new file mode 100644 index 0000000000..323e501fd0 --- /dev/null +++ b/queue-6.12/mptcp-call-dst_release-in-mptcp_active_enable.patch @@ -0,0 +1,42 @@ +From stable+bounces-188152-greg=kroah.com@vger.kernel.org Mon Oct 20 17:44:25 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 11:44:07 -0400 +Subject: mptcp: Call dst_release() in mptcp_active_enable(). +To: stable@vger.kernel.org +Cc: Kuniyuki Iwashima , "Matthieu Baerts (NGI0)" , Eric Dumazet , Jakub Kicinski , Sasha Levin +Message-ID: <20251020154409.1823664-6-sashal@kernel.org> + +From: Kuniyuki Iwashima + +[ Upstream commit 108a86c71c93ff28087994e6107bc99ebe336629 ] + +mptcp_active_enable() calls sk_dst_get(), which returns dst with its +refcount bumped, but forgot dst_release(). + +Let's add missing dst_release(). + +Cc: stable@vger.kernel.org +Fixes: 27069e7cb3d1 ("mptcp: disable active MPTCP in case of blackhole") +Signed-off-by: Kuniyuki Iwashima +Reviewed-by: Matthieu Baerts (NGI0) +Reviewed-by: Eric Dumazet +Link: https://patch.msgid.link/20250916214758.650211-7-kuniyu@google.com +Signed-off-by: Jakub Kicinski +Stable-dep-of: 833d4313bc1e ("mptcp: reset blackhole on success with non-loopback ifaces") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/ctrl.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/mptcp/ctrl.c ++++ b/net/mptcp/ctrl.c +@@ -385,6 +385,8 @@ void mptcp_active_enable(struct sock *sk + + if (dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK)) + atomic_set(&pernet->active_disable_times, 0); ++ ++ dst_release(dst); + } + } + diff --git a/queue-6.12/mptcp-reset-blackhole-on-success-with-non-loopback-ifaces.patch b/queue-6.12/mptcp-reset-blackhole-on-success-with-non-loopback-ifaces.patch new file mode 100644 index 0000000000..4f81f20d4b --- /dev/null +++ b/queue-6.12/mptcp-reset-blackhole-on-success-with-non-loopback-ifaces.patch @@ -0,0 +1,52 @@ +From stable+bounces-188154-greg=kroah.com@vger.kernel.org Mon Oct 20 17:44:31 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 11:44:09 -0400 +Subject: mptcp: reset blackhole on success with non-loopback ifaces +To: stable@vger.kernel.org +Cc: "Matthieu Baerts (NGI0)" , Simon Horman , Kuniyuki Iwashima , Jakub Kicinski , Sasha Levin +Message-ID: <20251020154409.1823664-8-sashal@kernel.org> + +From: "Matthieu Baerts (NGI0)" + +[ Upstream commit 833d4313bc1e9e194814917d23e8874d6b651649 ] + +When a first MPTCP connection gets successfully established after a +blackhole period, 'active_disable_times' was supposed to be reset when +this connection was done via any non-loopback interfaces. + +Unfortunately, the opposite condition was checked: only reset when the +connection was established via a loopback interface. Fixing this by +simply looking at the opposite. + +This is similar to what is done with TCP FastOpen, see +tcp_fastopen_active_disable_ofo_check(). + +This patch is a follow-up of a previous discussion linked to commit +893c49a78d9f ("mptcp: Use __sk_dst_get() and dst_dev_rcu() in +mptcp_active_enable()."), see [1]. + +Fixes: 27069e7cb3d1 ("mptcp: disable active MPTCP in case of blackhole") +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/4209a283-8822-47bd-95b7-87e96d9b7ea3@kernel.org [1] +Signed-off-by: Matthieu Baerts (NGI0) +Reviewed-by: Simon Horman +Reviewed-by: Kuniyuki Iwashima +Link: https://patch.msgid.link/20250918-net-next-mptcp-blackhole-reset-loopback-v1-1-bf5818326639@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/ctrl.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/mptcp/ctrl.c ++++ b/net/mptcp/ctrl.c +@@ -387,7 +387,7 @@ void mptcp_active_enable(struct sock *sk + rcu_read_lock(); + dst = __sk_dst_get(sk); + dev = dst ? dst_dev_rcu(dst) : NULL; +- if (dev && (dev->flags & IFF_LOOPBACK)) ++ if (!(dev && (dev->flags & IFF_LOOPBACK))) + atomic_set(&pernet->active_disable_times, 0); + rcu_read_unlock(); + } diff --git a/queue-6.12/mptcp-use-__sk_dst_get-and-dst_dev_rcu-in-mptcp_active_enable.patch b/queue-6.12/mptcp-use-__sk_dst_get-and-dst_dev_rcu-in-mptcp_active_enable.patch new file mode 100644 index 0000000000..6218c7cf44 --- /dev/null +++ b/queue-6.12/mptcp-use-__sk_dst_get-and-dst_dev_rcu-in-mptcp_active_enable.patch @@ -0,0 +1,55 @@ +From stable+bounces-188153-greg=kroah.com@vger.kernel.org Mon Oct 20 17:47:15 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 11:44:08 -0400 +Subject: mptcp: Use __sk_dst_get() and dst_dev_rcu() in mptcp_active_enable(). +To: stable@vger.kernel.org +Cc: Kuniyuki Iwashima , "Matthieu Baerts (NGI0)" , Eric Dumazet , Jakub Kicinski , Sasha Levin +Message-ID: <20251020154409.1823664-7-sashal@kernel.org> + +From: Kuniyuki Iwashima + +[ Upstream commit 893c49a78d9f85e4b8081b908fb7c407d018106a ] + +mptcp_active_enable() is called from subflow_finish_connect(), +which is icsk->icsk_af_ops->sk_rx_dst_set() and it's not always +under RCU. + +Using sk_dst_get(sk)->dev could trigger UAF. + +Let's use __sk_dst_get() and dst_dev_rcu(). + +Fixes: 27069e7cb3d1 ("mptcp: disable active MPTCP in case of blackhole") +Signed-off-by: Kuniyuki Iwashima +Reviewed-by: Matthieu Baerts (NGI0) +Reviewed-by: Eric Dumazet +Link: https://patch.msgid.link/20250916214758.650211-8-kuniyu@google.com +Signed-off-by: Jakub Kicinski +Stable-dep-of: 833d4313bc1e ("mptcp: reset blackhole on success with non-loopback ifaces") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/ctrl.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +--- a/net/mptcp/ctrl.c ++++ b/net/mptcp/ctrl.c +@@ -381,12 +381,15 @@ void mptcp_active_enable(struct sock *sk + struct mptcp_pernet *pernet = mptcp_get_pernet(sock_net(sk)); + + if (atomic_read(&pernet->active_disable_times)) { +- struct dst_entry *dst = sk_dst_get(sk); ++ struct net_device *dev; ++ struct dst_entry *dst; + +- if (dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK)) ++ rcu_read_lock(); ++ dst = __sk_dst_get(sk); ++ dev = dst ? dst_dev_rcu(dst) : NULL; ++ if (dev && (dev->flags & IFF_LOOPBACK)) + atomic_set(&pernet->active_disable_times, 0); +- +- dst_release(dst); ++ rcu_read_unlock(); + } + } + diff --git a/queue-6.12/net-add-locking-to-protect-skb-dev-access-in-ip_output.patch b/queue-6.12/net-add-locking-to-protect-skb-dev-access-in-ip_output.patch new file mode 100644 index 0000000000..336606f257 --- /dev/null +++ b/queue-6.12/net-add-locking-to-protect-skb-dev-access-in-ip_output.patch @@ -0,0 +1,121 @@ +From stable+bounces-188151-greg=kroah.com@vger.kernel.org Mon Oct 20 17:44:27 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 11:44:06 -0400 +Subject: net: Add locking to protect skb->dev access in ip_output +To: stable@vger.kernel.org +Cc: Sharath Chandra Vurukala , Eric Dumazet , Jakub Kicinski , Sasha Levin +Message-ID: <20251020154409.1823664-5-sashal@kernel.org> + +From: Sharath Chandra Vurukala + +[ Upstream commit 1dbf1d590d10a6d1978e8184f8dfe20af22d680a ] + +In ip_output() skb->dev is updated from the skb_dst(skb)->dev +this can become invalid when the interface is unregistered and freed, + +Introduced new skb_dst_dev_rcu() function to be used instead of +skb_dst_dev() within rcu_locks in ip_output.This will ensure that +all the skb's associated with the dev being deregistered will +be transnmitted out first, before freeing the dev. + +Given that ip_output() is called within an rcu_read_lock() +critical section or from a bottom-half context, it is safe to introduce +an RCU read-side critical section within it. + +Multiple panic call stacks were observed when UL traffic was run +in concurrency with device deregistration from different functions, +pasting one sample for reference. + +[496733.627565][T13385] Call trace: +[496733.627570][T13385] bpf_prog_ce7c9180c3b128ea_cgroupskb_egres+0x24c/0x7f0 +[496733.627581][T13385] __cgroup_bpf_run_filter_skb+0x128/0x498 +[496733.627595][T13385] ip_finish_output+0xa4/0xf4 +[496733.627605][T13385] ip_output+0x100/0x1a0 +[496733.627613][T13385] ip_send_skb+0x68/0x100 +[496733.627618][T13385] udp_send_skb+0x1c4/0x384 +[496733.627625][T13385] udp_sendmsg+0x7b0/0x898 +[496733.627631][T13385] inet_sendmsg+0x5c/0x7c +[496733.627639][T13385] __sys_sendto+0x174/0x1e4 +[496733.627647][T13385] __arm64_sys_sendto+0x28/0x3c +[496733.627653][T13385] invoke_syscall+0x58/0x11c +[496733.627662][T13385] el0_svc_common+0x88/0xf4 +[496733.627669][T13385] do_el0_svc+0x2c/0xb0 +[496733.627676][T13385] el0_svc+0x2c/0xa4 +[496733.627683][T13385] el0t_64_sync_handler+0x68/0xb4 +[496733.627689][T13385] el0t_64_sync+0x1a4/0x1a8 + +Changes in v3: +- Replaced WARN_ON() with WARN_ON_ONCE(), as suggested by Willem de Bruijn. +- Dropped legacy lines mistakenly pulled in from an outdated branch. + +Changes in v2: +- Addressed review comments from Eric Dumazet +- Used READ_ONCE() to prevent potential load/store tearing +- Added skb_dst_dev_rcu() and used along with rcu_read_lock() in ip_output + +Signed-off-by: Sharath Chandra Vurukala +Reviewed-by: Eric Dumazet +Link: https://patch.msgid.link/20250730105118.GA26100@hu-sharathv-hyd.qualcomm.com +Signed-off-by: Jakub Kicinski +Stable-dep-of: 833d4313bc1e ("mptcp: reset blackhole on success with non-loopback ifaces") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + include/net/dst.h | 12 ++++++++++++ + net/ipv4/ip_output.c | 15 ++++++++++----- + 2 files changed, 22 insertions(+), 5 deletions(-) + +--- a/include/net/dst.h ++++ b/include/net/dst.h +@@ -566,11 +566,23 @@ static inline struct net_device *dst_dev + return READ_ONCE(dst->dev); + } + ++static inline struct net_device *dst_dev_rcu(const struct dst_entry *dst) ++{ ++ /* In the future, use rcu_dereference(dst->dev) */ ++ WARN_ON_ONCE(!rcu_read_lock_held()); ++ return READ_ONCE(dst->dev); ++} ++ + static inline struct net_device *skb_dst_dev(const struct sk_buff *skb) + { + return dst_dev(skb_dst(skb)); + } + ++static inline struct net_device *skb_dst_dev_rcu(const struct sk_buff *skb) ++{ ++ return dst_dev_rcu(skb_dst(skb)); ++} ++ + static inline struct net *skb_dst_dev_net(const struct sk_buff *skb) + { + return dev_net(skb_dst_dev(skb)); +--- a/net/ipv4/ip_output.c ++++ b/net/ipv4/ip_output.c +@@ -426,15 +426,20 @@ int ip_mc_output(struct net *net, struct + + int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb) + { +- struct net_device *dev = skb_dst_dev(skb), *indev = skb->dev; ++ struct net_device *dev, *indev = skb->dev; ++ int ret_val; + ++ rcu_read_lock(); ++ dev = skb_dst_dev_rcu(skb); + skb->dev = dev; + skb->protocol = htons(ETH_P_IP); + +- return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, +- net, sk, skb, indev, dev, +- ip_finish_output, +- !(IPCB(skb)->flags & IPSKB_REROUTED)); ++ ret_val = NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, ++ net, sk, skb, indev, dev, ++ ip_finish_output, ++ !(IPCB(skb)->flags & IPSKB_REROUTED)); ++ rcu_read_unlock(); ++ return ret_val; + } + EXPORT_SYMBOL(ip_output); + diff --git a/queue-6.12/net-dst-add-four-helpers-to-annotate-data-races-around-dst-dev.patch b/queue-6.12/net-dst-add-four-helpers-to-annotate-data-races-around-dst-dev.patch new file mode 100644 index 0000000000..cf5160f4e4 --- /dev/null +++ b/queue-6.12/net-dst-add-four-helpers-to-annotate-data-races-around-dst-dev.patch @@ -0,0 +1,135 @@ +From stable+bounces-188149-greg=kroah.com@vger.kernel.org Mon Oct 20 17:47:03 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 11:44:04 -0400 +Subject: net: dst: add four helpers to annotate data-races around dst->dev +To: stable@vger.kernel.org +Cc: Eric Dumazet , Kuniyuki Iwashima , Jakub Kicinski , Sasha Levin +Message-ID: <20251020154409.1823664-3-sashal@kernel.org> + +From: Eric Dumazet + +[ Upstream commit 88fe14253e181878c2ddb51a298ae8c468a63010 ] + +dst->dev is read locklessly in many contexts, +and written in dst_dev_put(). + +Fixing all the races is going to need many changes. + +We probably will have to add full RCU protection. + +Add three helpers to ease this painful process. + +static inline struct net_device *dst_dev(const struct dst_entry *dst) +{ + return READ_ONCE(dst->dev); +} + +static inline struct net_device *skb_dst_dev(const struct sk_buff *skb) +{ + return dst_dev(skb_dst(skb)); +} + +static inline struct net *skb_dst_dev_net(const struct sk_buff *skb) +{ + return dev_net(skb_dst_dev(skb)); +} + +static inline struct net *skb_dst_dev_net_rcu(const struct sk_buff *skb) +{ + return dev_net_rcu(skb_dst_dev(skb)); +} + +Fixes: 4a6ce2b6f2ec ("net: introduce a new function dst_dev_put()") +Signed-off-by: Eric Dumazet +Reviewed-by: Kuniyuki Iwashima +Link: https://patch.msgid.link/20250630121934.3399505-7-edumazet@google.com +Signed-off-by: Jakub Kicinski +Stable-dep-of: 833d4313bc1e ("mptcp: reset blackhole on success with non-loopback ifaces") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + include/net/dst.h | 20 ++++++++++++++++++++ + net/core/dst.c | 4 ++-- + net/core/sock.c | 8 ++++---- + 3 files changed, 26 insertions(+), 6 deletions(-) + +--- a/include/net/dst.h ++++ b/include/net/dst.h +@@ -561,6 +561,26 @@ static inline void skb_dst_update_pmtu_n + dst->ops->update_pmtu(dst, NULL, skb, mtu, false); + } + ++static inline struct net_device *dst_dev(const struct dst_entry *dst) ++{ ++ return READ_ONCE(dst->dev); ++} ++ ++static inline struct net_device *skb_dst_dev(const struct sk_buff *skb) ++{ ++ return dst_dev(skb_dst(skb)); ++} ++ ++static inline struct net *skb_dst_dev_net(const struct sk_buff *skb) ++{ ++ return dev_net(skb_dst_dev(skb)); ++} ++ ++static inline struct net *skb_dst_dev_net_rcu(const struct sk_buff *skb) ++{ ++ return dev_net_rcu(skb_dst_dev(skb)); ++} ++ + struct dst_entry *dst_blackhole_check(struct dst_entry *dst, u32 cookie); + void dst_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu, bool confirm_neigh); +--- a/net/core/dst.c ++++ b/net/core/dst.c +@@ -150,7 +150,7 @@ void dst_dev_put(struct dst_entry *dst) + dst->ops->ifdown(dst, dev); + WRITE_ONCE(dst->input, dst_discard); + WRITE_ONCE(dst->output, dst_discard_out); +- dst->dev = blackhole_netdev; ++ WRITE_ONCE(dst->dev, blackhole_netdev); + netdev_ref_replace(dev, blackhole_netdev, &dst->dev_tracker, + GFP_ATOMIC); + } +@@ -263,7 +263,7 @@ unsigned int dst_blackhole_mtu(const str + { + unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); + +- return mtu ? : dst->dev->mtu; ++ return mtu ? : dst_dev(dst)->mtu; + } + EXPORT_SYMBOL_GPL(dst_blackhole_mtu); + +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -2534,8 +2534,8 @@ static u32 sk_dst_gso_max_size(struct so + !ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)); + #endif + /* pairs with the WRITE_ONCE() in netif_set_gso(_ipv4)_max_size() */ +- max_size = is_ipv6 ? READ_ONCE(dst->dev->gso_max_size) : +- READ_ONCE(dst->dev->gso_ipv4_max_size); ++ max_size = is_ipv6 ? READ_ONCE(dst_dev(dst)->gso_max_size) : ++ READ_ONCE(dst_dev(dst)->gso_ipv4_max_size); + if (max_size > GSO_LEGACY_MAX_SIZE && !sk_is_tcp(sk)) + max_size = GSO_LEGACY_MAX_SIZE; + +@@ -2546,7 +2546,7 @@ void sk_setup_caps(struct sock *sk, stru + { + u32 max_segs = 1; + +- sk->sk_route_caps = dst->dev->features; ++ sk->sk_route_caps = dst_dev(dst)->features; + if (sk_is_tcp(sk)) { + struct inet_connection_sock *icsk = inet_csk(sk); + +@@ -2564,7 +2564,7 @@ void sk_setup_caps(struct sock *sk, stru + sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; + sk->sk_gso_max_size = sk_dst_gso_max_size(sk, dst); + /* pairs with the WRITE_ONCE() in netif_set_gso_max_segs() */ +- max_segs = max_t(u32, READ_ONCE(dst->dev->gso_max_segs), 1); ++ max_segs = max_t(u32, READ_ONCE(dst_dev(dst)->gso_max_segs), 1); + } + } + sk->sk_gso_max_segs = max_segs; diff --git a/queue-6.12/nfsd-define-a-proc_layoutcommit-for-the-flexfiles-layout-type.patch b/queue-6.12/nfsd-define-a-proc_layoutcommit-for-the-flexfiles-layout-type.patch new file mode 100644 index 0000000000..0ed4907726 --- /dev/null +++ b/queue-6.12/nfsd-define-a-proc_layoutcommit-for-the-flexfiles-layout-type.patch @@ -0,0 +1,49 @@ +From stable+bounces-188247-greg=kroah.com@vger.kernel.org Mon Oct 20 22:33:56 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 16:33:43 -0400 +Subject: NFSD: Define a proc_layoutcommit for the FlexFiles layout type +To: stable@vger.kernel.org +Cc: Chuck Lever , Robert Morris , Thomas Haynes , Sasha Levin +Message-ID: <20251020203343.1907954-5-sashal@kernel.org> + +From: Chuck Lever + +[ Upstream commit 4b47a8601b71ad98833b447d465592d847b4dc77 ] + +Avoid a crash if a pNFS client should happen to send a LAYOUTCOMMIT +operation on a FlexFiles layout. + +Reported-by: Robert Morris +Closes: https://lore.kernel.org/linux-nfs/152f99b2-ba35-4dec-93a9-4690e625dccd@oracle.com/T/#t +Cc: Thomas Haynes +Cc: stable@vger.kernel.org +Fixes: 9b9960a0ca47 ("nfsd: Add a super simple flex file server") +Signed-off-by: Chuck Lever +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfsd/flexfilelayout.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/fs/nfsd/flexfilelayout.c ++++ b/fs/nfsd/flexfilelayout.c +@@ -125,6 +125,13 @@ nfsd4_ff_proc_getdeviceinfo(struct super + return 0; + } + ++static __be32 ++nfsd4_ff_proc_layoutcommit(struct inode *inode, struct svc_rqst *rqstp, ++ struct nfsd4_layoutcommit *lcp) ++{ ++ return nfs_ok; ++} ++ + const struct nfsd4_layout_ops ff_layout_ops = { + .notify_types = + NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE, +@@ -133,4 +140,5 @@ const struct nfsd4_layout_ops ff_layout_ + .encode_getdeviceinfo = nfsd4_ff_encode_getdeviceinfo, + .proc_layoutget = nfsd4_ff_proc_layoutget, + .encode_layoutget = nfsd4_ff_encode_layoutget, ++ .proc_layoutcommit = nfsd4_ff_proc_layoutcommit, + }; diff --git a/queue-6.12/nfsd-drop-dprintk-in-blocklayout-xdr-functions.patch b/queue-6.12/nfsd-drop-dprintk-in-blocklayout-xdr-functions.patch new file mode 100644 index 0000000000..d17f52b072 --- /dev/null +++ b/queue-6.12/nfsd-drop-dprintk-in-blocklayout-xdr-functions.patch @@ -0,0 +1,129 @@ +From stable+bounces-188066-greg=kroah.com@vger.kernel.org Mon Oct 20 14:53:20 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 08:53:00 -0400 +Subject: nfsd: Drop dprintk in blocklayout xdr functions +To: stable@vger.kernel.org +Cc: Sergey Bashirov , Jeff Layton , Christoph Hellwig , Chuck Lever , Sasha Levin +Message-ID: <20251020125305.1760219-2-sashal@kernel.org> + +From: Sergey Bashirov + +[ Upstream commit e339967eecf1305557f7c697e1bc10b5cc495454 ] + +Minor clean up. Instead of dprintk there are appropriate error codes. + +Signed-off-by: Sergey Bashirov +Reviewed-by: Jeff Layton +Reviewed-by: Christoph Hellwig +Signed-off-by: Chuck Lever +Stable-dep-of: d68886bae76a ("NFSD: Fix last write offset handling in layoutcommit") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfsd/blocklayoutxdr.c | 40 +++++++--------------------------------- + 1 file changed, 7 insertions(+), 33 deletions(-) + +--- a/fs/nfsd/blocklayoutxdr.c ++++ b/fs/nfsd/blocklayoutxdr.c +@@ -139,28 +139,19 @@ nfsd4_block_decode_layoutupdate(__be32 * + struct iomap *iomaps; + u32 nr_iomaps, i; + +- if (len < sizeof(u32)) { +- dprintk("%s: extent array too small: %u\n", __func__, len); ++ if (len < sizeof(u32)) + return nfserr_bad_xdr; +- } + len -= sizeof(u32); +- if (len % PNFS_BLOCK_EXTENT_SIZE) { +- dprintk("%s: extent array invalid: %u\n", __func__, len); ++ if (len % PNFS_BLOCK_EXTENT_SIZE) + return nfserr_bad_xdr; +- } + + nr_iomaps = be32_to_cpup(p++); +- if (nr_iomaps != len / PNFS_BLOCK_EXTENT_SIZE) { +- dprintk("%s: extent array size mismatch: %u/%u\n", +- __func__, len, nr_iomaps); ++ if (nr_iomaps != len / PNFS_BLOCK_EXTENT_SIZE) + return nfserr_bad_xdr; +- } + + iomaps = kcalloc(nr_iomaps, sizeof(*iomaps), GFP_KERNEL); +- if (!iomaps) { +- dprintk("%s: failed to allocate extent array\n", __func__); ++ if (!iomaps) + return nfserr_delay; +- } + + for (i = 0; i < nr_iomaps; i++) { + struct pnfs_block_extent bex; +@@ -170,26 +161,18 @@ nfsd4_block_decode_layoutupdate(__be32 * + + p = xdr_decode_hyper(p, &bex.foff); + if (bex.foff & (block_size - 1)) { +- dprintk("%s: unaligned offset 0x%llx\n", +- __func__, bex.foff); + goto fail; + } + p = xdr_decode_hyper(p, &bex.len); + if (bex.len & (block_size - 1)) { +- dprintk("%s: unaligned length 0x%llx\n", +- __func__, bex.foff); + goto fail; + } + p = xdr_decode_hyper(p, &bex.soff); + if (bex.soff & (block_size - 1)) { +- dprintk("%s: unaligned disk offset 0x%llx\n", +- __func__, bex.soff); + goto fail; + } + bex.es = be32_to_cpup(p++); + if (bex.es != PNFS_BLOCK_READWRITE_DATA) { +- dprintk("%s: incorrect extent state %d\n", +- __func__, bex.es); + goto fail; + } + +@@ -231,38 +214,29 @@ nfsd4_scsi_decode_layoutupdate(__be32 *p + struct iomap *iomaps; + u32 nr_iomaps, expected, i; + +- if (len < sizeof(u32)) { +- dprintk("%s: extent array too small: %u\n", __func__, len); ++ if (len < sizeof(u32)) + return nfserr_bad_xdr; +- } + + nr_iomaps = be32_to_cpup(p++); + expected = sizeof(__be32) + nr_iomaps * PNFS_SCSI_RANGE_SIZE; +- if (len != expected) { +- dprintk("%s: extent array size mismatch: %u/%u\n", +- __func__, len, expected); ++ if (len != expected) + return nfserr_bad_xdr; +- } + + iomaps = kcalloc(nr_iomaps, sizeof(*iomaps), GFP_KERNEL); +- if (!iomaps) { +- dprintk("%s: failed to allocate extent array\n", __func__); ++ if (!iomaps) + return nfserr_delay; +- } + + for (i = 0; i < nr_iomaps; i++) { + u64 val; + + p = xdr_decode_hyper(p, &val); + if (val & (block_size - 1)) { +- dprintk("%s: unaligned offset 0x%llx\n", __func__, val); + goto fail; + } + iomaps[i].offset = val; + + p = xdr_decode_hyper(p, &val); + if (val & (block_size - 1)) { +- dprintk("%s: unaligned length 0x%llx\n", __func__, val); + goto fail; + } + iomaps[i].length = val; diff --git a/queue-6.12/nfsd-fix-last-write-offset-handling-in-layoutcommit.patch b/queue-6.12/nfsd-fix-last-write-offset-handling-in-layoutcommit.patch new file mode 100644 index 0000000000..83dcc5cc36 --- /dev/null +++ b/queue-6.12/nfsd-fix-last-write-offset-handling-in-layoutcommit.patch @@ -0,0 +1,113 @@ +From stable+bounces-188070-greg=kroah.com@vger.kernel.org Mon Oct 20 14:53:46 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 08:53:04 -0400 +Subject: NFSD: Fix last write offset handling in layoutcommit +To: stable@vger.kernel.org +Cc: Sergey Bashirov , Konstantin Evtushenko , Christoph Hellwig , Jeff Layton , Chuck Lever , Sasha Levin +Message-ID: <20251020125305.1760219-6-sashal@kernel.org> + +From: Sergey Bashirov + +[ Upstream commit d68886bae76a4b9b3484d23e5b7df086f940fa38 ] + +The data type of loca_last_write_offset is newoffset4 and is switched +on a boolean value, no_newoffset, that indicates if a previous write +occurred or not. If no_newoffset is FALSE, an offset is not given. +This means that client does not try to update the file size. Thus, +server should not try to calculate new file size and check if it fits +into the segment range. See RFC 8881, section 12.5.4.2. + +Sometimes the current incorrect logic may cause clients to hang when +trying to sync an inode. If layoutcommit fails, the client marks the +inode as dirty again. + +Fixes: 9cf514ccfacb ("nfsd: implement pNFS operations") +Cc: stable@vger.kernel.org +Co-developed-by: Konstantin Evtushenko +Signed-off-by: Konstantin Evtushenko +Signed-off-by: Sergey Bashirov +Reviewed-by: Christoph Hellwig +Reviewed-by: Jeff Layton +Signed-off-by: Chuck Lever +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfsd/blocklayout.c | 5 ++--- + fs/nfsd/nfs4proc.c | 30 +++++++++++++++--------------- + 2 files changed, 17 insertions(+), 18 deletions(-) + +--- a/fs/nfsd/blocklayout.c ++++ b/fs/nfsd/blocklayout.c +@@ -118,7 +118,6 @@ nfsd4_block_commit_blocks(struct inode * + struct iomap *iomaps, int nr_iomaps) + { + struct timespec64 mtime = inode_get_mtime(inode); +- loff_t new_size = lcp->lc_last_wr + 1; + struct iattr iattr = { .ia_valid = 0 }; + int error; + +@@ -128,9 +127,9 @@ nfsd4_block_commit_blocks(struct inode * + iattr.ia_valid |= ATTR_ATIME | ATTR_CTIME | ATTR_MTIME; + iattr.ia_atime = iattr.ia_ctime = iattr.ia_mtime = lcp->lc_mtime; + +- if (new_size > i_size_read(inode)) { ++ if (lcp->lc_size_chg) { + iattr.ia_valid |= ATTR_SIZE; +- iattr.ia_size = new_size; ++ iattr.ia_size = lcp->lc_newsize; + } + + error = inode->i_sb->s_export_op->commit_blocks(inode, iomaps, +--- a/fs/nfsd/nfs4proc.c ++++ b/fs/nfsd/nfs4proc.c +@@ -2362,7 +2362,6 @@ nfsd4_layoutcommit(struct svc_rqst *rqst + const struct nfsd4_layout_seg *seg = &lcp->lc_seg; + struct svc_fh *current_fh = &cstate->current_fh; + const struct nfsd4_layout_ops *ops; +- loff_t new_size = lcp->lc_last_wr + 1; + struct inode *inode; + struct nfs4_layout_stateid *ls; + __be32 nfserr; +@@ -2378,13 +2377,21 @@ nfsd4_layoutcommit(struct svc_rqst *rqst + goto out; + inode = d_inode(current_fh->fh_dentry); + +- nfserr = nfserr_inval; +- if (new_size <= seg->offset) +- goto out; +- if (new_size > seg->offset + seg->length) +- goto out; +- if (!lcp->lc_newoffset && new_size > i_size_read(inode)) +- goto out; ++ lcp->lc_size_chg = false; ++ if (lcp->lc_newoffset) { ++ loff_t new_size = lcp->lc_last_wr + 1; ++ ++ nfserr = nfserr_inval; ++ if (new_size <= seg->offset) ++ goto out; ++ if (new_size > seg->offset + seg->length) ++ goto out; ++ ++ if (new_size > i_size_read(inode)) { ++ lcp->lc_size_chg = true; ++ lcp->lc_newsize = new_size; ++ } ++ } + + nfserr = nfsd4_preprocess_layout_stateid(rqstp, cstate, &lcp->lc_sid, + false, lcp->lc_layout_type, +@@ -2400,13 +2407,6 @@ nfsd4_layoutcommit(struct svc_rqst *rqst + /* LAYOUTCOMMIT does not require any serialization */ + mutex_unlock(&ls->ls_mutex); + +- if (new_size > i_size_read(inode)) { +- lcp->lc_size_chg = true; +- lcp->lc_newsize = new_size; +- } else { +- lcp->lc_size_chg = false; +- } +- + nfserr = ops->proc_layoutcommit(inode, rqstp, lcp); + nfs4_put_stid(&ls->ls_stid); + out: diff --git a/queue-6.12/nfsd-implement-large-extent-array-support-in-pnfs.patch b/queue-6.12/nfsd-implement-large-extent-array-support-in-pnfs.patch new file mode 100644 index 0000000000..6548752b5d --- /dev/null +++ b/queue-6.12/nfsd-implement-large-extent-array-support-in-pnfs.patch @@ -0,0 +1,335 @@ +From stable+bounces-188069-greg=kroah.com@vger.kernel.org Mon Oct 20 14:53:26 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 08:53:03 -0400 +Subject: NFSD: Implement large extent array support in pNFS +To: stable@vger.kernel.org +Cc: Sergey Bashirov , Konstantin Evtushenko , Jeff Layton , Christoph Hellwig , Chuck Lever , Sasha Levin +Message-ID: <20251020125305.1760219-5-sashal@kernel.org> + +From: Sergey Bashirov + +[ Upstream commit f963cf2b91a30b5614c514f3ad53ca124cb65280 ] + +When pNFS client in the block or scsi layout mode sends layoutcommit +to MDS, a variable length array of modified extents is supplied within +the request. This patch allows the server to accept such extent arrays +if they do not fit within single memory page. + +The issue can be reproduced when writing to a 1GB file using FIO with +O_DIRECT, 4K block and large I/O depth without preallocation of the +file. In this case, the server returns NFSERR_BADXDR to the client. + +Co-developed-by: Konstantin Evtushenko +Signed-off-by: Konstantin Evtushenko +Signed-off-by: Sergey Bashirov +Reviewed-by: Jeff Layton +Reviewed-by: Christoph Hellwig +Signed-off-by: Chuck Lever +Stable-dep-of: d68886bae76a ("NFSD: Fix last write offset handling in layoutcommit") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfsd/blocklayout.c | 20 ++++++----- + fs/nfsd/blocklayoutxdr.c | 83 +++++++++++++++++++++++++++++++---------------- + fs/nfsd/blocklayoutxdr.h | 4 +- + fs/nfsd/nfs4proc.c | 2 - + fs/nfsd/nfs4xdr.c | 11 ++---- + fs/nfsd/pnfs.h | 1 + fs/nfsd/xdr4.h | 3 - + 7 files changed, 78 insertions(+), 46 deletions(-) + +--- a/fs/nfsd/blocklayout.c ++++ b/fs/nfsd/blocklayout.c +@@ -173,16 +173,18 @@ nfsd4_block_proc_getdeviceinfo(struct su + } + + static __be32 +-nfsd4_block_proc_layoutcommit(struct inode *inode, ++nfsd4_block_proc_layoutcommit(struct inode *inode, struct svc_rqst *rqstp, + struct nfsd4_layoutcommit *lcp) + { + struct iomap *iomaps; + int nr_iomaps; + __be32 nfserr; + +- nfserr = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout, +- lcp->lc_up_len, &iomaps, &nr_iomaps, +- i_blocksize(inode)); ++ rqstp->rq_arg = lcp->lc_up_layout; ++ svcxdr_init_decode(rqstp); ++ ++ nfserr = nfsd4_block_decode_layoutupdate(&rqstp->rq_arg_stream, ++ &iomaps, &nr_iomaps, i_blocksize(inode)); + if (nfserr != nfs_ok) + return nfserr; + +@@ -313,16 +315,18 @@ nfsd4_scsi_proc_getdeviceinfo(struct sup + return nfserrno(nfsd4_block_get_device_info_scsi(sb, clp, gdp)); + } + static __be32 +-nfsd4_scsi_proc_layoutcommit(struct inode *inode, ++nfsd4_scsi_proc_layoutcommit(struct inode *inode, struct svc_rqst *rqstp, + struct nfsd4_layoutcommit *lcp) + { + struct iomap *iomaps; + int nr_iomaps; + __be32 nfserr; + +- nfserr = nfsd4_scsi_decode_layoutupdate(lcp->lc_up_layout, +- lcp->lc_up_len, &iomaps, &nr_iomaps, +- i_blocksize(inode)); ++ rqstp->rq_arg = lcp->lc_up_layout; ++ svcxdr_init_decode(rqstp); ++ ++ nfserr = nfsd4_scsi_decode_layoutupdate(&rqstp->rq_arg_stream, ++ &iomaps, &nr_iomaps, i_blocksize(inode)); + if (nfserr != nfs_ok) + return nfserr; + +--- a/fs/nfsd/blocklayoutxdr.c ++++ b/fs/nfsd/blocklayoutxdr.c +@@ -113,8 +113,7 @@ nfsd4_block_encode_getdeviceinfo(struct + + /** + * nfsd4_block_decode_layoutupdate - decode the block layout extent array +- * @p: pointer to the xdr data +- * @len: number of bytes to decode ++ * @xdr: subbuf set to the encoded array + * @iomapp: pointer to store the decoded extent array + * @nr_iomapsp: pointer to store the number of extents + * @block_size: alignment of extent offset and length +@@ -127,25 +126,24 @@ nfsd4_block_encode_getdeviceinfo(struct + * + * Return values: + * %nfs_ok: Successful decoding, @iomapp and @nr_iomapsp are valid +- * %nfserr_bad_xdr: The encoded array in @p is invalid ++ * %nfserr_bad_xdr: The encoded array in @xdr is invalid + * %nfserr_inval: An unaligned extent found + * %nfserr_delay: Failed to allocate memory for @iomapp + */ + __be32 +-nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp, ++nfsd4_block_decode_layoutupdate(struct xdr_stream *xdr, struct iomap **iomapp, + int *nr_iomapsp, u32 block_size) + { + struct iomap *iomaps; +- u32 nr_iomaps, i; ++ u32 nr_iomaps, expected, len, i; ++ __be32 nfserr; + +- if (len < sizeof(u32)) +- return nfserr_bad_xdr; +- len -= sizeof(u32); +- if (len % PNFS_BLOCK_EXTENT_SIZE) ++ if (xdr_stream_decode_u32(xdr, &nr_iomaps)) + return nfserr_bad_xdr; + +- nr_iomaps = be32_to_cpup(p++); +- if (nr_iomaps != len / PNFS_BLOCK_EXTENT_SIZE) ++ len = sizeof(__be32) + xdr_stream_remaining(xdr); ++ expected = sizeof(__be32) + nr_iomaps * PNFS_BLOCK_EXTENT_SIZE; ++ if (len != expected) + return nfserr_bad_xdr; + + iomaps = kcalloc(nr_iomaps, sizeof(*iomaps), GFP_KERNEL); +@@ -155,21 +153,44 @@ nfsd4_block_decode_layoutupdate(__be32 * + for (i = 0; i < nr_iomaps; i++) { + struct pnfs_block_extent bex; + +- p = svcxdr_decode_deviceid4(p, &bex.vol_id); +- p = xdr_decode_hyper(p, &bex.foff); ++ if (nfsd4_decode_deviceid4(xdr, &bex.vol_id)) { ++ nfserr = nfserr_bad_xdr; ++ goto fail; ++ } ++ ++ if (xdr_stream_decode_u64(xdr, &bex.foff)) { ++ nfserr = nfserr_bad_xdr; ++ goto fail; ++ } + if (bex.foff & (block_size - 1)) { ++ nfserr = nfserr_inval; ++ goto fail; ++ } ++ ++ if (xdr_stream_decode_u64(xdr, &bex.len)) { ++ nfserr = nfserr_bad_xdr; + goto fail; + } +- p = xdr_decode_hyper(p, &bex.len); + if (bex.len & (block_size - 1)) { ++ nfserr = nfserr_inval; ++ goto fail; ++ } ++ ++ if (xdr_stream_decode_u64(xdr, &bex.soff)) { ++ nfserr = nfserr_bad_xdr; + goto fail; + } +- p = xdr_decode_hyper(p, &bex.soff); + if (bex.soff & (block_size - 1)) { ++ nfserr = nfserr_inval; ++ goto fail; ++ } ++ ++ if (xdr_stream_decode_u32(xdr, &bex.es)) { ++ nfserr = nfserr_bad_xdr; + goto fail; + } +- bex.es = be32_to_cpup(p++); + if (bex.es != PNFS_BLOCK_READWRITE_DATA) { ++ nfserr = nfserr_inval; + goto fail; + } + +@@ -182,13 +203,12 @@ nfsd4_block_decode_layoutupdate(__be32 * + return nfs_ok; + fail: + kfree(iomaps); +- return nfserr_inval; ++ return nfserr; + } + + /** + * nfsd4_scsi_decode_layoutupdate - decode the scsi layout extent array +- * @p: pointer to the xdr data +- * @len: number of bytes to decode ++ * @xdr: subbuf set to the encoded array + * @iomapp: pointer to store the decoded extent array + * @nr_iomapsp: pointer to store the number of extents + * @block_size: alignment of extent offset and length +@@ -200,21 +220,22 @@ fail: + * + * Return values: + * %nfs_ok: Successful decoding, @iomapp and @nr_iomapsp are valid +- * %nfserr_bad_xdr: The encoded array in @p is invalid ++ * %nfserr_bad_xdr: The encoded array in @xdr is invalid + * %nfserr_inval: An unaligned extent found + * %nfserr_delay: Failed to allocate memory for @iomapp + */ + __be32 +-nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp, ++nfsd4_scsi_decode_layoutupdate(struct xdr_stream *xdr, struct iomap **iomapp, + int *nr_iomapsp, u32 block_size) + { + struct iomap *iomaps; +- u32 nr_iomaps, expected, i; ++ u32 nr_iomaps, expected, len, i; ++ __be32 nfserr; + +- if (len < sizeof(u32)) ++ if (xdr_stream_decode_u32(xdr, &nr_iomaps)) + return nfserr_bad_xdr; + +- nr_iomaps = be32_to_cpup(p++); ++ len = sizeof(__be32) + xdr_stream_remaining(xdr); + expected = sizeof(__be32) + nr_iomaps * PNFS_SCSI_RANGE_SIZE; + if (len != expected) + return nfserr_bad_xdr; +@@ -226,14 +247,22 @@ nfsd4_scsi_decode_layoutupdate(__be32 *p + for (i = 0; i < nr_iomaps; i++) { + u64 val; + +- p = xdr_decode_hyper(p, &val); ++ if (xdr_stream_decode_u64(xdr, &val)) { ++ nfserr = nfserr_bad_xdr; ++ goto fail; ++ } + if (val & (block_size - 1)) { ++ nfserr = nfserr_inval; + goto fail; + } + iomaps[i].offset = val; + +- p = xdr_decode_hyper(p, &val); ++ if (xdr_stream_decode_u64(xdr, &val)) { ++ nfserr = nfserr_bad_xdr; ++ goto fail; ++ } + if (val & (block_size - 1)) { ++ nfserr = nfserr_inval; + goto fail; + } + iomaps[i].length = val; +@@ -244,5 +273,5 @@ nfsd4_scsi_decode_layoutupdate(__be32 *p + return nfs_ok; + fail: + kfree(iomaps); +- return nfserr_inval; ++ return nfserr; + } +--- a/fs/nfsd/blocklayoutxdr.h ++++ b/fs/nfsd/blocklayoutxdr.h +@@ -54,9 +54,9 @@ __be32 nfsd4_block_encode_getdeviceinfo( + const struct nfsd4_getdeviceinfo *gdp); + __be32 nfsd4_block_encode_layoutget(struct xdr_stream *xdr, + const struct nfsd4_layoutget *lgp); +-__be32 nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, ++__be32 nfsd4_block_decode_layoutupdate(struct xdr_stream *xdr, + struct iomap **iomapp, int *nr_iomapsp, u32 block_size); +-__be32 nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, ++__be32 nfsd4_scsi_decode_layoutupdate(struct xdr_stream *xdr, + struct iomap **iomapp, int *nr_iomapsp, u32 block_size); + + #endif /* _NFSD_BLOCKLAYOUTXDR_H */ +--- a/fs/nfsd/nfs4proc.c ++++ b/fs/nfsd/nfs4proc.c +@@ -2407,7 +2407,7 @@ nfsd4_layoutcommit(struct svc_rqst *rqst + lcp->lc_size_chg = false; + } + +- nfserr = ops->proc_layoutcommit(inode, lcp); ++ nfserr = ops->proc_layoutcommit(inode, rqstp, lcp); + nfs4_put_stid(&ls->ls_stid); + out: + return nfserr; +--- a/fs/nfsd/nfs4xdr.c ++++ b/fs/nfsd/nfs4xdr.c +@@ -571,6 +571,8 @@ static __be32 + nfsd4_decode_layoutupdate4(struct nfsd4_compoundargs *argp, + struct nfsd4_layoutcommit *lcp) + { ++ u32 len; ++ + if (xdr_stream_decode_u32(argp->xdr, &lcp->lc_layout_type) < 0) + return nfserr_bad_xdr; + if (lcp->lc_layout_type < LAYOUT_NFSV4_1_FILES) +@@ -578,13 +580,10 @@ nfsd4_decode_layoutupdate4(struct nfsd4_ + if (lcp->lc_layout_type >= LAYOUT_TYPE_MAX) + return nfserr_bad_xdr; + +- if (xdr_stream_decode_u32(argp->xdr, &lcp->lc_up_len) < 0) ++ if (xdr_stream_decode_u32(argp->xdr, &len) < 0) ++ return nfserr_bad_xdr; ++ if (!xdr_stream_subsegment(argp->xdr, &lcp->lc_up_layout, len)) + return nfserr_bad_xdr; +- if (lcp->lc_up_len > 0) { +- lcp->lc_up_layout = xdr_inline_decode(argp->xdr, lcp->lc_up_len); +- if (!lcp->lc_up_layout) +- return nfserr_bad_xdr; +- } + + return nfs_ok; + } +--- a/fs/nfsd/pnfs.h ++++ b/fs/nfsd/pnfs.h +@@ -35,6 +35,7 @@ struct nfsd4_layout_ops { + const struct nfsd4_layoutget *lgp); + + __be32 (*proc_layoutcommit)(struct inode *inode, ++ struct svc_rqst *rqstp, + struct nfsd4_layoutcommit *lcp); + + void (*fence_client)(struct nfs4_layout_stateid *ls, +--- a/fs/nfsd/xdr4.h ++++ b/fs/nfsd/xdr4.h +@@ -665,8 +665,7 @@ struct nfsd4_layoutcommit { + u64 lc_last_wr; /* request */ + struct timespec64 lc_mtime; /* request */ + u32 lc_layout_type; /* request */ +- u32 lc_up_len; /* layout length */ +- void *lc_up_layout; /* decoded by callback */ ++ struct xdr_buf lc_up_layout; /* decoded by callback */ + bool lc_size_chg; /* response */ + u64 lc_newsize; /* response */ + }; diff --git a/queue-6.12/nfsd-minor-cleanup-in-layoutcommit-processing.patch b/queue-6.12/nfsd-minor-cleanup-in-layoutcommit-processing.patch new file mode 100644 index 0000000000..a56be05a40 --- /dev/null +++ b/queue-6.12/nfsd-minor-cleanup-in-layoutcommit-processing.patch @@ -0,0 +1,50 @@ +From stable+bounces-188068-greg=kroah.com@vger.kernel.org Mon Oct 20 14:53:43 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 08:53:02 -0400 +Subject: NFSD: Minor cleanup in layoutcommit processing +To: stable@vger.kernel.org +Cc: Sergey Bashirov , Christoph Hellwig , Chuck Lever , Sasha Levin +Message-ID: <20251020125305.1760219-4-sashal@kernel.org> + +From: Sergey Bashirov + +[ Upstream commit 274365a51d88658fb51cca637ba579034e90a799 ] + +Remove dprintk in nfsd4_layoutcommit. These are not needed +in day to day usage, and the information is also available +in Wireshark when capturing NFS traffic. + +Reviewed-by: Christoph Hellwig +Signed-off-by: Sergey Bashirov +Signed-off-by: Chuck Lever +Stable-dep-of: d68886bae76a ("NFSD: Fix last write offset handling in layoutcommit") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfsd/nfs4proc.c | 12 +++--------- + 1 file changed, 3 insertions(+), 9 deletions(-) + +--- a/fs/nfsd/nfs4proc.c ++++ b/fs/nfsd/nfs4proc.c +@@ -2379,18 +2379,12 @@ nfsd4_layoutcommit(struct svc_rqst *rqst + inode = d_inode(current_fh->fh_dentry); + + nfserr = nfserr_inval; +- if (new_size <= seg->offset) { +- dprintk("pnfsd: last write before layout segment\n"); ++ if (new_size <= seg->offset) + goto out; +- } +- if (new_size > seg->offset + seg->length) { +- dprintk("pnfsd: last write beyond layout segment\n"); ++ if (new_size > seg->offset + seg->length) + goto out; +- } +- if (!lcp->lc_newoffset && new_size > i_size_read(inode)) { +- dprintk("pnfsd: layoutcommit beyond EOF\n"); ++ if (!lcp->lc_newoffset && new_size > i_size_read(inode)) + goto out; +- } + + nfserr = nfsd4_preprocess_layout_stateid(rqstp, cstate, &lcp->lc_sid, + false, lcp->lc_layout_type, diff --git a/queue-6.12/nfsd-rework-encoding-and-decoding-of-nfsd4_deviceid.patch b/queue-6.12/nfsd-rework-encoding-and-decoding-of-nfsd4_deviceid.patch new file mode 100644 index 0000000000..21818b58c9 --- /dev/null +++ b/queue-6.12/nfsd-rework-encoding-and-decoding-of-nfsd4_deviceid.patch @@ -0,0 +1,156 @@ +From stable+bounces-188067-greg=kroah.com@vger.kernel.org Mon Oct 20 14:53:26 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 08:53:01 -0400 +Subject: NFSD: Rework encoding and decoding of nfsd4_deviceid +To: stable@vger.kernel.org +Cc: Sergey Bashirov , Chuck Lever , Sasha Levin +Message-ID: <20251020125305.1760219-3-sashal@kernel.org> + +From: Sergey Bashirov + +[ Upstream commit 832738e4b325b742940761e10487403f9aad13e8 ] + +Compilers may optimize the layout of C structures, so we should not rely +on sizeof struct and memcpy to encode and decode XDR structures. The byte +order of the fields should also be taken into account. + +This patch adds the correct functions to handle the deviceid4 structure +and removes the pad field, which is currently not used by NFSD, from the +runtime state. The server's byte order is preserved because the deviceid4 +blob on the wire is only used as a cookie by the client. + +Signed-off-by: Sergey Bashirov +Signed-off-by: Chuck Lever +Stable-dep-of: d68886bae76a ("NFSD: Fix last write offset handling in layoutcommit") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfsd/blocklayoutxdr.c | 7 ++----- + fs/nfsd/flexfilelayoutxdr.c | 3 +-- + fs/nfsd/nfs4layouts.c | 1 - + fs/nfsd/nfs4xdr.c | 14 +------------- + fs/nfsd/xdr4.h | 36 +++++++++++++++++++++++++++++++++++- + 5 files changed, 39 insertions(+), 22 deletions(-) + +--- a/fs/nfsd/blocklayoutxdr.c ++++ b/fs/nfsd/blocklayoutxdr.c +@@ -29,8 +29,7 @@ nfsd4_block_encode_layoutget(struct xdr_ + *p++ = cpu_to_be32(len); + *p++ = cpu_to_be32(1); /* we always return a single extent */ + +- p = xdr_encode_opaque_fixed(p, &b->vol_id, +- sizeof(struct nfsd4_deviceid)); ++ p = svcxdr_encode_deviceid4(p, &b->vol_id); + p = xdr_encode_hyper(p, b->foff); + p = xdr_encode_hyper(p, b->len); + p = xdr_encode_hyper(p, b->soff); +@@ -156,9 +155,7 @@ nfsd4_block_decode_layoutupdate(__be32 * + for (i = 0; i < nr_iomaps; i++) { + struct pnfs_block_extent bex; + +- memcpy(&bex.vol_id, p, sizeof(struct nfsd4_deviceid)); +- p += XDR_QUADLEN(sizeof(struct nfsd4_deviceid)); +- ++ p = svcxdr_decode_deviceid4(p, &bex.vol_id); + p = xdr_decode_hyper(p, &bex.foff); + if (bex.foff & (block_size - 1)) { + goto fail; +--- a/fs/nfsd/flexfilelayoutxdr.c ++++ b/fs/nfsd/flexfilelayoutxdr.c +@@ -54,8 +54,7 @@ nfsd4_ff_encode_layoutget(struct xdr_str + *p++ = cpu_to_be32(1); /* single mirror */ + *p++ = cpu_to_be32(1); /* single data server */ + +- p = xdr_encode_opaque_fixed(p, &fl->deviceid, +- sizeof(struct nfsd4_deviceid)); ++ p = svcxdr_encode_deviceid4(p, &fl->deviceid); + + *p++ = cpu_to_be32(1); /* efficiency */ + +--- a/fs/nfsd/nfs4layouts.c ++++ b/fs/nfsd/nfs4layouts.c +@@ -120,7 +120,6 @@ nfsd4_set_deviceid(struct nfsd4_deviceid + + id->fsid_idx = fhp->fh_export->ex_devid_map->idx; + id->generation = device_generation; +- id->pad = 0; + return 0; + } + +--- a/fs/nfsd/nfs4xdr.c ++++ b/fs/nfsd/nfs4xdr.c +@@ -566,18 +566,6 @@ nfsd4_decode_state_owner4(struct nfsd4_c + } + + #ifdef CONFIG_NFSD_PNFS +-static __be32 +-nfsd4_decode_deviceid4(struct nfsd4_compoundargs *argp, +- struct nfsd4_deviceid *devid) +-{ +- __be32 *p; +- +- p = xdr_inline_decode(argp->xdr, NFS4_DEVICEID4_SIZE); +- if (!p) +- return nfserr_bad_xdr; +- memcpy(devid, p, sizeof(*devid)); +- return nfs_ok; +-} + + static __be32 + nfsd4_decode_layoutupdate4(struct nfsd4_compoundargs *argp, +@@ -1762,7 +1750,7 @@ nfsd4_decode_getdeviceinfo(struct nfsd4_ + __be32 status; + + memset(gdev, 0, sizeof(*gdev)); +- status = nfsd4_decode_deviceid4(argp, &gdev->gd_devid); ++ status = nfsd4_decode_deviceid4(argp->xdr, &gdev->gd_devid); + if (status) + return status; + if (xdr_stream_decode_u32(argp->xdr, &gdev->gd_layout_type) < 0) +--- a/fs/nfsd/xdr4.h ++++ b/fs/nfsd/xdr4.h +@@ -596,9 +596,43 @@ struct nfsd4_reclaim_complete { + struct nfsd4_deviceid { + u64 fsid_idx; + u32 generation; +- u32 pad; + }; + ++static inline __be32 * ++svcxdr_encode_deviceid4(__be32 *p, const struct nfsd4_deviceid *devid) ++{ ++ __be64 *q = (__be64 *)p; ++ ++ *q = (__force __be64)devid->fsid_idx; ++ p += 2; ++ *p++ = (__force __be32)devid->generation; ++ *p++ = xdr_zero; ++ return p; ++} ++ ++static inline __be32 * ++svcxdr_decode_deviceid4(__be32 *p, struct nfsd4_deviceid *devid) ++{ ++ __be64 *q = (__be64 *)p; ++ ++ devid->fsid_idx = (__force u64)(*q); ++ p += 2; ++ devid->generation = (__force u32)(*p++); ++ p++; /* NFSD does not use the remaining octets */ ++ return p; ++} ++ ++static inline __be32 ++nfsd4_decode_deviceid4(struct xdr_stream *xdr, struct nfsd4_deviceid *devid) ++{ ++ __be32 *p = xdr_inline_decode(xdr, NFS4_DEVICEID4_SIZE); ++ ++ if (unlikely(!p)) ++ return nfserr_bad_xdr; ++ svcxdr_decode_deviceid4(p, devid); ++ return nfs_ok; ++} ++ + struct nfsd4_layout_seg { + u32 iomode; + u64 offset; diff --git a/queue-6.12/nfsd-use-correct-error-code-when-decoding-extents.patch b/queue-6.12/nfsd-use-correct-error-code-when-decoding-extents.patch new file mode 100644 index 0000000000..85e40e54fd --- /dev/null +++ b/queue-6.12/nfsd-use-correct-error-code-when-decoding-extents.patch @@ -0,0 +1,234 @@ +From stable+bounces-188065-greg=kroah.com@vger.kernel.org Mon Oct 20 14:53:24 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 08:52:59 -0400 +Subject: nfsd: Use correct error code when decoding extents +To: stable@vger.kernel.org +Cc: Sergey Bashirov , Christoph Hellwig , Chuck Lever , Sasha Levin +Message-ID: <20251020125305.1760219-1-sashal@kernel.org> + +From: Sergey Bashirov + +[ Upstream commit 26d05e1c37d276905bc921384b5a75158fca284b ] + +Update error codes in decoding functions of block and scsi layout +drivers to match the core nfsd code. NFS4ERR_EINVAL means that the +server was able to decode the request, but the decoded values are +invalid. Use NFS4ERR_BADXDR instead to indicate a decoding error. +And ENOMEM is changed to nfs code NFS4ERR_DELAY. + +Signed-off-by: Sergey Bashirov +Reviewed-by: Christoph Hellwig +Signed-off-by: Chuck Lever +Stable-dep-of: d68886bae76a ("NFSD: Fix last write offset handling in layoutcommit") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfsd/blocklayout.c | 20 +++++++------ + fs/nfsd/blocklayoutxdr.c | 71 +++++++++++++++++++++++++++++++++++++---------- + fs/nfsd/blocklayoutxdr.h | 8 ++--- + fs/nfsd/nfsd.h | 1 + 4 files changed, 73 insertions(+), 27 deletions(-) + +--- a/fs/nfsd/blocklayout.c ++++ b/fs/nfsd/blocklayout.c +@@ -178,11 +178,13 @@ nfsd4_block_proc_layoutcommit(struct ino + { + struct iomap *iomaps; + int nr_iomaps; ++ __be32 nfserr; + +- nr_iomaps = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout, +- lcp->lc_up_len, &iomaps, i_blocksize(inode)); +- if (nr_iomaps < 0) +- return nfserrno(nr_iomaps); ++ nfserr = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout, ++ lcp->lc_up_len, &iomaps, &nr_iomaps, ++ i_blocksize(inode)); ++ if (nfserr != nfs_ok) ++ return nfserr; + + return nfsd4_block_commit_blocks(inode, lcp, iomaps, nr_iomaps); + } +@@ -316,11 +318,13 @@ nfsd4_scsi_proc_layoutcommit(struct inod + { + struct iomap *iomaps; + int nr_iomaps; ++ __be32 nfserr; + +- nr_iomaps = nfsd4_scsi_decode_layoutupdate(lcp->lc_up_layout, +- lcp->lc_up_len, &iomaps, i_blocksize(inode)); +- if (nr_iomaps < 0) +- return nfserrno(nr_iomaps); ++ nfserr = nfsd4_scsi_decode_layoutupdate(lcp->lc_up_layout, ++ lcp->lc_up_len, &iomaps, &nr_iomaps, ++ i_blocksize(inode)); ++ if (nfserr != nfs_ok) ++ return nfserr; + + return nfsd4_block_commit_blocks(inode, lcp, iomaps, nr_iomaps); + } +--- a/fs/nfsd/blocklayoutxdr.c ++++ b/fs/nfsd/blocklayoutxdr.c +@@ -112,34 +112,54 @@ nfsd4_block_encode_getdeviceinfo(struct + return 0; + } + +-int ++/** ++ * nfsd4_block_decode_layoutupdate - decode the block layout extent array ++ * @p: pointer to the xdr data ++ * @len: number of bytes to decode ++ * @iomapp: pointer to store the decoded extent array ++ * @nr_iomapsp: pointer to store the number of extents ++ * @block_size: alignment of extent offset and length ++ * ++ * This function decodes the opaque field of the layoutupdate4 structure ++ * in a layoutcommit request for the block layout driver. The field is ++ * actually an array of extents sent by the client. It also checks that ++ * the file offset, storage offset and length of each extent are aligned ++ * by @block_size. ++ * ++ * Return values: ++ * %nfs_ok: Successful decoding, @iomapp and @nr_iomapsp are valid ++ * %nfserr_bad_xdr: The encoded array in @p is invalid ++ * %nfserr_inval: An unaligned extent found ++ * %nfserr_delay: Failed to allocate memory for @iomapp ++ */ ++__be32 + nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp, +- u32 block_size) ++ int *nr_iomapsp, u32 block_size) + { + struct iomap *iomaps; + u32 nr_iomaps, i; + + if (len < sizeof(u32)) { + dprintk("%s: extent array too small: %u\n", __func__, len); +- return -EINVAL; ++ return nfserr_bad_xdr; + } + len -= sizeof(u32); + if (len % PNFS_BLOCK_EXTENT_SIZE) { + dprintk("%s: extent array invalid: %u\n", __func__, len); +- return -EINVAL; ++ return nfserr_bad_xdr; + } + + nr_iomaps = be32_to_cpup(p++); + if (nr_iomaps != len / PNFS_BLOCK_EXTENT_SIZE) { + dprintk("%s: extent array size mismatch: %u/%u\n", + __func__, len, nr_iomaps); +- return -EINVAL; ++ return nfserr_bad_xdr; + } + + iomaps = kcalloc(nr_iomaps, sizeof(*iomaps), GFP_KERNEL); + if (!iomaps) { + dprintk("%s: failed to allocate extent array\n", __func__); +- return -ENOMEM; ++ return nfserr_delay; + } + + for (i = 0; i < nr_iomaps; i++) { +@@ -178,22 +198,42 @@ nfsd4_block_decode_layoutupdate(__be32 * + } + + *iomapp = iomaps; +- return nr_iomaps; ++ *nr_iomapsp = nr_iomaps; ++ return nfs_ok; + fail: + kfree(iomaps); +- return -EINVAL; ++ return nfserr_inval; + } + +-int ++/** ++ * nfsd4_scsi_decode_layoutupdate - decode the scsi layout extent array ++ * @p: pointer to the xdr data ++ * @len: number of bytes to decode ++ * @iomapp: pointer to store the decoded extent array ++ * @nr_iomapsp: pointer to store the number of extents ++ * @block_size: alignment of extent offset and length ++ * ++ * This function decodes the opaque field of the layoutupdate4 structure ++ * in a layoutcommit request for the scsi layout driver. The field is ++ * actually an array of extents sent by the client. It also checks that ++ * the offset and length of each extent are aligned by @block_size. ++ * ++ * Return values: ++ * %nfs_ok: Successful decoding, @iomapp and @nr_iomapsp are valid ++ * %nfserr_bad_xdr: The encoded array in @p is invalid ++ * %nfserr_inval: An unaligned extent found ++ * %nfserr_delay: Failed to allocate memory for @iomapp ++ */ ++__be32 + nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp, +- u32 block_size) ++ int *nr_iomapsp, u32 block_size) + { + struct iomap *iomaps; + u32 nr_iomaps, expected, i; + + if (len < sizeof(u32)) { + dprintk("%s: extent array too small: %u\n", __func__, len); +- return -EINVAL; ++ return nfserr_bad_xdr; + } + + nr_iomaps = be32_to_cpup(p++); +@@ -201,13 +241,13 @@ nfsd4_scsi_decode_layoutupdate(__be32 *p + if (len != expected) { + dprintk("%s: extent array size mismatch: %u/%u\n", + __func__, len, expected); +- return -EINVAL; ++ return nfserr_bad_xdr; + } + + iomaps = kcalloc(nr_iomaps, sizeof(*iomaps), GFP_KERNEL); + if (!iomaps) { + dprintk("%s: failed to allocate extent array\n", __func__); +- return -ENOMEM; ++ return nfserr_delay; + } + + for (i = 0; i < nr_iomaps; i++) { +@@ -229,8 +269,9 @@ nfsd4_scsi_decode_layoutupdate(__be32 *p + } + + *iomapp = iomaps; +- return nr_iomaps; ++ *nr_iomapsp = nr_iomaps; ++ return nfs_ok; + fail: + kfree(iomaps); +- return -EINVAL; ++ return nfserr_inval; + } +--- a/fs/nfsd/blocklayoutxdr.h ++++ b/fs/nfsd/blocklayoutxdr.h +@@ -54,9 +54,9 @@ __be32 nfsd4_block_encode_getdeviceinfo( + const struct nfsd4_getdeviceinfo *gdp); + __be32 nfsd4_block_encode_layoutget(struct xdr_stream *xdr, + const struct nfsd4_layoutget *lgp); +-int nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp, +- u32 block_size); +-int nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp, +- u32 block_size); ++__be32 nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, ++ struct iomap **iomapp, int *nr_iomapsp, u32 block_size); ++__be32 nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, ++ struct iomap **iomapp, int *nr_iomapsp, u32 block_size); + + #endif /* _NFSD_BLOCKLAYOUTXDR_H */ +--- a/fs/nfsd/nfsd.h ++++ b/fs/nfsd/nfsd.h +@@ -286,6 +286,7 @@ void nfsd_lockd_shutdown(void); + #define nfserr_cb_path_down cpu_to_be32(NFSERR_CB_PATH_DOWN) + #define nfserr_locked cpu_to_be32(NFSERR_LOCKED) + #define nfserr_wrongsec cpu_to_be32(NFSERR_WRONGSEC) ++#define nfserr_delay cpu_to_be32(NFS4ERR_DELAY) + #define nfserr_badiomode cpu_to_be32(NFS4ERR_BADIOMODE) + #define nfserr_badlayout cpu_to_be32(NFS4ERR_BADLAYOUT) + #define nfserr_bad_session_digest cpu_to_be32(NFS4ERR_BAD_SESSION_DIGEST) diff --git a/queue-6.12/padata-reset-next-cpu-when-reorder-sequence-wraps-around.patch b/queue-6.12/padata-reset-next-cpu-when-reorder-sequence-wraps-around.patch new file mode 100644 index 0000000000..6531d76afb --- /dev/null +++ b/queue-6.12/padata-reset-next-cpu-when-reorder-sequence-wraps-around.patch @@ -0,0 +1,44 @@ +From stable+bounces-188140-greg=kroah.com@vger.kernel.org Mon Oct 20 17:37:12 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 11:37:02 -0400 +Subject: padata: Reset next CPU when reorder sequence wraps around +To: stable@vger.kernel.org +Cc: Xiao Liang , Herbert Xu , Sasha Levin +Message-ID: <20251020153702.1820394-1-sashal@kernel.org> + +From: Xiao Liang + +[ Upstream commit 501302d5cee0d8e8ec2c4a5919c37e0df9abc99b ] + +When seq_nr wraps around, the next reorder job with seq 0 is hashed to +the first CPU in padata_do_serial(). Correspondingly, need reset pd->cpu +to the first one when pd->processed wraps around. Otherwise, if the +number of used CPUs is not a power of 2, padata_find_next() will be +checking a wrong list, hence deadlock. + +Fixes: 6fc4dbcf0276 ("padata: Replace delayed timer with immediate workqueue in padata_reorder") +Cc: +Signed-off-by: Xiao Liang +Signed-off-by: Herbert Xu +[ relocated fix to padata_find_next() using pd->processed and pd->cpu structure fields ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + kernel/padata.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/kernel/padata.c ++++ b/kernel/padata.c +@@ -290,7 +290,11 @@ static struct padata_priv *padata_find_n + if (remove_object) { + list_del_init(&padata->list); + ++pd->processed; +- pd->cpu = cpumask_next_wrap(cpu, pd->cpumask.pcpu, -1, false); ++ /* When sequence wraps around, reset to the first CPU. */ ++ if (unlikely(pd->processed == 0)) ++ pd->cpu = cpumask_first(pd->cpumask.pcpu); ++ else ++ pd->cpu = cpumask_next_wrap(cpu, pd->cpumask.pcpu, -1, false); + } + + spin_unlock(&reorder->lock); diff --git a/queue-6.12/pci-add-pci_vdevice_sub-helper-macro.patch b/queue-6.12/pci-add-pci_vdevice_sub-helper-macro.patch new file mode 100644 index 0000000000..6baf78ad60 --- /dev/null +++ b/queue-6.12/pci-add-pci_vdevice_sub-helper-macro.patch @@ -0,0 +1,51 @@ +From stable+bounces-188207-greg=kroah.com@vger.kernel.org Mon Oct 20 19:28:50 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 13:28:38 -0400 +Subject: PCI: Add PCI_VDEVICE_SUB helper macro +To: stable@vger.kernel.org +Cc: Piotr Kwapulinski , Przemek Kitszel , Bjorn Helgaas , Rafal Romanowski , Tony Nguyen , Sasha Levin +Message-ID: <20251020172841.1850940-1-sashal@kernel.org> + +From: Piotr Kwapulinski + +[ Upstream commit 208fff3f567e2a3c3e7e4788845e90245c3891b4 ] + +PCI_VDEVICE_SUB generates the pci_device_id struct layout for +the specific PCI device/subdevice. Private data may follow the +output. + +Reviewed-by: Przemek Kitszel +Signed-off-by: Piotr Kwapulinski +Acked-by: Bjorn Helgaas +Tested-by: Rafal Romanowski +Signed-off-by: Tony Nguyen +Stable-dep-of: a7075f501bd3 ("ixgbevf: fix mailbox API compatibility by negotiating supported features") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/pci.h | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +--- a/include/linux/pci.h ++++ b/include/linux/pci.h +@@ -1067,6 +1067,20 @@ struct pci_driver { + .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, 0, 0 + + /** ++ * PCI_VDEVICE_SUB - describe a specific PCI device/subdevice in a short form ++ * @vend: the vendor name ++ * @dev: the 16 bit PCI Device ID ++ * @subvend: the 16 bit PCI Subvendor ID ++ * @subdev: the 16 bit PCI Subdevice ID ++ * ++ * Generate the pci_device_id struct layout for the specific PCI ++ * device/subdevice. Private data may follow the output. ++ */ ++#define PCI_VDEVICE_SUB(vend, dev, subvend, subdev) \ ++ .vendor = PCI_VENDOR_ID_##vend, .device = (dev), \ ++ .subvendor = (subvend), .subdevice = (subdev), 0, 0 ++ ++/** + * PCI_DEVICE_DATA - macro used to describe a specific PCI device in very short form + * @vend: the vendor name (without PCI_VENDOR_ID_ prefix) + * @dev: the device name (without PCI_DEVICE_ID__ prefix) diff --git a/queue-6.12/phy-cadence-cdns-dphy-fix-pll-lock-and-o_cmn_ready-polling.patch b/queue-6.12/phy-cadence-cdns-dphy-fix-pll-lock-and-o_cmn_ready-polling.patch new file mode 100644 index 0000000000..88a16a4dd1 --- /dev/null +++ b/queue-6.12/phy-cadence-cdns-dphy-fix-pll-lock-and-o_cmn_ready-polling.patch @@ -0,0 +1,265 @@ +From stable+bounces-188089-greg=kroah.com@vger.kernel.org Mon Oct 20 15:02:19 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 08:59:06 -0400 +Subject: phy: cadence: cdns-dphy: Fix PLL lock and O_CMN_READY polling +To: stable@vger.kernel.org +Cc: Devarsh Thakkar , Harikrishna Shenoy , Tomi Valkeinen , Vinod Koul , Sasha Levin +Message-ID: <20251020125906.1762647-2-sashal@kernel.org> + +From: Devarsh Thakkar + +[ Upstream commit 284fb19a3ffb1083c3ad9c00d29749d09dddb99c ] + +PLL lockup and O_CMN_READY assertion can only happen after common state +machine gets enabled by programming DPHY_CMN_SSM register, but driver was +polling them before the common state machine was enabled which is +incorrect. This is as per the DPHY initialization sequence as mentioned in +J721E TRM [1] at section "12.7.2.4.1.2.1 Start-up Sequence Timing Diagram". +It shows O_CMN_READY polling at the end after common configuration pin +setup where the common configuration pin setup step enables state machine +as referenced in "Table 12-1533. Common Configuration-Related Setup +mentions state machine" + +To fix this : +- Add new function callbacks for polling on PLL lock and O_CMN_READY + assertion. +- As state machine and clocks get enabled in power_on callback only, move + the clock related programming part from configure callback to power_on +callback and poll for the PLL lockup and O_CMN_READY assertion after state +machine gets enabled. +- The configure callback only saves the PLL configuration received from the + client driver which will be applied later on in power_on callback. +- Add checks to ensure configure is called before power_on and state + machine is in disabled state before power_on callback is called. +- Disable state machine in power_off so that client driver can re-configure + the PLL by following up a power_off, configure, power_on sequence. + +[1]: https://www.ti.com/lit/zip/spruil1 + +Cc: stable@vger.kernel.org +Fixes: 7a343c8bf4b5 ("phy: Add Cadence D-PHY support") +Signed-off-by: Devarsh Thakkar +Tested-by: Harikrishna Shenoy +Reviewed-by: Tomi Valkeinen +Link: https://lore.kernel.org/r/20250704125915.1224738-2-devarsht@ti.com +Signed-off-by: Vinod Koul +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/phy/cadence/cdns-dphy.c | 124 +++++++++++++++++++++++++++++----------- + 1 file changed, 92 insertions(+), 32 deletions(-) + +--- a/drivers/phy/cadence/cdns-dphy.c ++++ b/drivers/phy/cadence/cdns-dphy.c +@@ -100,6 +100,8 @@ struct cdns_dphy_ops { + void (*set_pll_cfg)(struct cdns_dphy *dphy, + const struct cdns_dphy_cfg *cfg); + unsigned long (*get_wakeup_time_ns)(struct cdns_dphy *dphy); ++ int (*wait_for_pll_lock)(struct cdns_dphy *dphy); ++ int (*wait_for_cmn_ready)(struct cdns_dphy *dphy); + }; + + struct cdns_dphy { +@@ -109,6 +111,8 @@ struct cdns_dphy { + struct clk *pll_ref_clk; + const struct cdns_dphy_ops *ops; + struct phy *phy; ++ bool is_configured; ++ bool is_powered; + }; + + /* Order of bands is important since the index is the band number. */ +@@ -195,6 +199,16 @@ static unsigned long cdns_dphy_get_wakeu + return dphy->ops->get_wakeup_time_ns(dphy); + } + ++static int cdns_dphy_wait_for_pll_lock(struct cdns_dphy *dphy) ++{ ++ return dphy->ops->wait_for_pll_lock ? dphy->ops->wait_for_pll_lock(dphy) : 0; ++} ++ ++static int cdns_dphy_wait_for_cmn_ready(struct cdns_dphy *dphy) ++{ ++ return dphy->ops->wait_for_cmn_ready ? dphy->ops->wait_for_cmn_ready(dphy) : 0; ++} ++ + static unsigned long cdns_dphy_ref_get_wakeup_time_ns(struct cdns_dphy *dphy) + { + /* Default wakeup time is 800 ns (in a simulated environment). */ +@@ -236,7 +250,6 @@ static unsigned long cdns_dphy_j721e_get + static void cdns_dphy_j721e_set_pll_cfg(struct cdns_dphy *dphy, + const struct cdns_dphy_cfg *cfg) + { +- u32 status; + + /* + * set the PWM and PLL Byteclk divider settings to recommended values +@@ -253,13 +266,6 @@ static void cdns_dphy_j721e_set_pll_cfg( + + writel(DPHY_TX_J721E_WIZ_LANE_RSTB, + dphy->regs + DPHY_TX_J721E_WIZ_RST_CTRL); +- +- readl_poll_timeout(dphy->regs + DPHY_TX_J721E_WIZ_PLL_CTRL, status, +- (status & DPHY_TX_WIZ_PLL_LOCK), 0, POLL_TIMEOUT_US); +- +- readl_poll_timeout(dphy->regs + DPHY_TX_J721E_WIZ_STATUS, status, +- (status & DPHY_TX_WIZ_O_CMN_READY), 0, +- POLL_TIMEOUT_US); + } + + static void cdns_dphy_j721e_set_psm_div(struct cdns_dphy *dphy, u8 div) +@@ -267,6 +273,23 @@ static void cdns_dphy_j721e_set_psm_div( + writel(div, dphy->regs + DPHY_TX_J721E_WIZ_PSM_FREQ); + } + ++static int cdns_dphy_j721e_wait_for_pll_lock(struct cdns_dphy *dphy) ++{ ++ u32 status; ++ ++ return readl_poll_timeout(dphy->regs + DPHY_TX_J721E_WIZ_PLL_CTRL, status, ++ status & DPHY_TX_WIZ_PLL_LOCK, 0, POLL_TIMEOUT_US); ++} ++ ++static int cdns_dphy_j721e_wait_for_cmn_ready(struct cdns_dphy *dphy) ++{ ++ u32 status; ++ ++ return readl_poll_timeout(dphy->regs + DPHY_TX_J721E_WIZ_STATUS, status, ++ status & DPHY_TX_WIZ_O_CMN_READY, 0, ++ POLL_TIMEOUT_US); ++} ++ + /* + * This is the reference implementation of DPHY hooks. Specific integration of + * this IP may have to re-implement some of them depending on how they decided +@@ -282,6 +305,8 @@ static const struct cdns_dphy_ops j721e_ + .get_wakeup_time_ns = cdns_dphy_j721e_get_wakeup_time_ns, + .set_pll_cfg = cdns_dphy_j721e_set_pll_cfg, + .set_psm_div = cdns_dphy_j721e_set_psm_div, ++ .wait_for_pll_lock = cdns_dphy_j721e_wait_for_pll_lock, ++ .wait_for_cmn_ready = cdns_dphy_j721e_wait_for_cmn_ready, + }; + + static int cdns_dphy_config_from_opts(struct phy *phy, +@@ -339,21 +364,36 @@ static int cdns_dphy_validate(struct phy + static int cdns_dphy_configure(struct phy *phy, union phy_configure_opts *opts) + { + struct cdns_dphy *dphy = phy_get_drvdata(phy); +- struct cdns_dphy_cfg cfg = { 0 }; +- int ret, band_ctrl; +- unsigned int reg; ++ int ret; + +- ret = cdns_dphy_config_from_opts(phy, &opts->mipi_dphy, &cfg); +- if (ret) +- return ret; ++ ret = cdns_dphy_config_from_opts(phy, &opts->mipi_dphy, &dphy->cfg); ++ if (!ret) ++ dphy->is_configured = true; ++ ++ return ret; ++} ++ ++static int cdns_dphy_power_on(struct phy *phy) ++{ ++ struct cdns_dphy *dphy = phy_get_drvdata(phy); ++ int ret; ++ u32 reg; ++ ++ if (!dphy->is_configured || dphy->is_powered) ++ return -EINVAL; ++ ++ clk_prepare_enable(dphy->psm_clk); ++ clk_prepare_enable(dphy->pll_ref_clk); + + /* + * Configure the internal PSM clk divider so that the DPHY has a + * 1MHz clk (or something close). + */ + ret = cdns_dphy_setup_psm(dphy); +- if (ret) +- return ret; ++ if (ret) { ++ dev_err(&dphy->phy->dev, "Failed to setup PSM with error %d\n", ret); ++ goto err_power_on; ++ } + + /* + * Configure attach clk lanes to data lanes: the DPHY has 2 clk lanes +@@ -368,40 +408,60 @@ static int cdns_dphy_configure(struct ph + * Configure the DPHY PLL that will be used to generate the TX byte + * clk. + */ +- cdns_dphy_set_pll_cfg(dphy, &cfg); ++ cdns_dphy_set_pll_cfg(dphy, &dphy->cfg); + +- band_ctrl = cdns_dphy_tx_get_band_ctrl(opts->mipi_dphy.hs_clk_rate); +- if (band_ctrl < 0) +- return band_ctrl; ++ ret = cdns_dphy_tx_get_band_ctrl(dphy->cfg.hs_clk_rate); ++ if (ret < 0) { ++ dev_err(&dphy->phy->dev, "Failed to get band control value with error %d\n", ret); ++ goto err_power_on; ++ } + +- reg = FIELD_PREP(DPHY_BAND_CFG_LEFT_BAND, band_ctrl) | +- FIELD_PREP(DPHY_BAND_CFG_RIGHT_BAND, band_ctrl); ++ reg = FIELD_PREP(DPHY_BAND_CFG_LEFT_BAND, ret) | ++ FIELD_PREP(DPHY_BAND_CFG_RIGHT_BAND, ret); + writel(reg, dphy->regs + DPHY_BAND_CFG); + +- return 0; +-} +- +-static int cdns_dphy_power_on(struct phy *phy) +-{ +- struct cdns_dphy *dphy = phy_get_drvdata(phy); +- +- clk_prepare_enable(dphy->psm_clk); +- clk_prepare_enable(dphy->pll_ref_clk); +- + /* Start TX state machine. */ + writel(DPHY_CMN_SSM_EN | DPHY_CMN_TX_MODE_EN, + dphy->regs + DPHY_CMN_SSM); + ++ ret = cdns_dphy_wait_for_pll_lock(dphy); ++ if (ret) { ++ dev_err(&dphy->phy->dev, "Failed to lock PLL with error %d\n", ret); ++ goto err_power_on; ++ } ++ ++ ret = cdns_dphy_wait_for_cmn_ready(dphy); ++ if (ret) { ++ dev_err(&dphy->phy->dev, "O_CMN_READY signal failed to assert with error %d\n", ++ ret); ++ goto err_power_on; ++ } ++ ++ dphy->is_powered = true; ++ + return 0; ++ ++err_power_on: ++ clk_disable_unprepare(dphy->pll_ref_clk); ++ clk_disable_unprepare(dphy->psm_clk); ++ ++ return ret; + } + + static int cdns_dphy_power_off(struct phy *phy) + { + struct cdns_dphy *dphy = phy_get_drvdata(phy); ++ u32 reg; + + clk_disable_unprepare(dphy->pll_ref_clk); + clk_disable_unprepare(dphy->psm_clk); + ++ /* Stop TX state machine. */ ++ reg = readl(dphy->regs + DPHY_CMN_SSM); ++ writel(reg & ~DPHY_CMN_SSM_EN, dphy->regs + DPHY_CMN_SSM); ++ ++ dphy->is_powered = false; ++ + return 0; + } + diff --git a/queue-6.12/phy-cadence-cdns-dphy-update-calibration-wait-time-for-startup-state-machine.patch b/queue-6.12/phy-cadence-cdns-dphy-update-calibration-wait-time-for-startup-state-machine.patch new file mode 100644 index 0000000000..219343174c --- /dev/null +++ b/queue-6.12/phy-cadence-cdns-dphy-update-calibration-wait-time-for-startup-state-machine.patch @@ -0,0 +1,58 @@ +From stable+bounces-188377-greg=kroah.com@vger.kernel.org Tue Oct 21 18:44:00 2025 +From: Sasha Levin +Date: Tue, 21 Oct 2025 12:43:33 -0400 +Subject: phy: cadence: cdns-dphy: Update calibration wait time for startup state machine +To: stable@vger.kernel.org +Cc: Devarsh Thakkar , Harikrishna Shenoy , Tomi Valkeinen , Vinod Koul , Sasha Levin +Message-ID: <20251021164333.2380694-3-sashal@kernel.org> + +From: Devarsh Thakkar + +[ Upstream commit 2c27aaee934a1b5229152fe33a14f1fdf50da143 ] + +Do read-modify-write so that we re-use the characterized reset value as +specified in TRM [1] to program calibration wait time which defines number +of cycles to wait for after startup state machine is in bandgap enable +state. + +This fixes PLL lock timeout error faced while using RPi DSI Panel on TI's +AM62L and J721E SoC since earlier calibration wait time was getting +overwritten to zero value thus failing the PLL to lockup and causing +timeout. + +[1] AM62P TRM (Section 14.8.6.3.2.1.1 DPHY_TX_DPHYTX_CMN0_CMN_DIG_TBIT2): +Link: https://www.ti.com/lit/pdf/spruj83 + +Cc: stable@vger.kernel.org +Fixes: 7a343c8bf4b5 ("phy: Add Cadence D-PHY support") +Signed-off-by: Devarsh Thakkar +Tested-by: Harikrishna Shenoy +Reviewed-by: Tomi Valkeinen +Link: https://lore.kernel.org/r/20250704125915.1224738-3-devarsht@ti.com +Signed-off-by: Vinod Koul +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/phy/cadence/cdns-dphy.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/drivers/phy/cadence/cdns-dphy.c ++++ b/drivers/phy/cadence/cdns-dphy.c +@@ -30,6 +30,7 @@ + + #define DPHY_CMN_SSM DPHY_PMA_CMN(0x20) + #define DPHY_CMN_SSM_EN BIT(0) ++#define DPHY_CMN_SSM_CAL_WAIT_TIME GENMASK(8, 1) + #define DPHY_CMN_TX_MODE_EN BIT(9) + + #define DPHY_CMN_PWM DPHY_PMA_CMN(0x40) +@@ -421,7 +422,8 @@ static int cdns_dphy_power_on(struct phy + writel(reg, dphy->regs + DPHY_BAND_CFG); + + /* Start TX state machine. */ +- writel(DPHY_CMN_SSM_EN | DPHY_CMN_TX_MODE_EN, ++ reg = readl(dphy->regs + DPHY_CMN_SSM); ++ writel((reg & DPHY_CMN_SSM_CAL_WAIT_TIME) | DPHY_CMN_SSM_EN | DPHY_CMN_TX_MODE_EN, + dphy->regs + DPHY_CMN_SSM); + + ret = cdns_dphy_wait_for_pll_lock(dphy); diff --git a/queue-6.12/phy-cdns-dphy-store-hs_clk_rate-and-return-it.patch b/queue-6.12/phy-cdns-dphy-store-hs_clk_rate-and-return-it.patch new file mode 100644 index 0000000000..66cca436a5 --- /dev/null +++ b/queue-6.12/phy-cdns-dphy-store-hs_clk_rate-and-return-it.patch @@ -0,0 +1,59 @@ +From stable+bounces-188088-greg=kroah.com@vger.kernel.org Mon Oct 20 15:04:37 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 08:59:05 -0400 +Subject: phy: cdns-dphy: Store hs_clk_rate and return it +To: stable@vger.kernel.org +Cc: Tomi Valkeinen , Aradhya Bhatia , Parth Pancholi , Jayesh Choudhary , Vinod Koul , Devarsh Thakkar , Sasha Levin +Message-ID: <20251020125906.1762647-1-sashal@kernel.org> + +From: Tomi Valkeinen + +[ Upstream commit 689a54acb56858c85de8c7285db82b8ae6dbf683 ] + +The DPHY driver does not return the actual hs_clk_rate, so the DSI +driver has no idea what clock was actually achieved. Set the realized +hs_clk_rate to the opts struct, so that the DSI driver gets it back. + +Reviewed-by: Aradhya Bhatia +Tested-by: Parth Pancholi +Tested-by: Jayesh Choudhary +Acked-by: Vinod Koul +Reviewed-by: Devarsh Thakkar +Signed-off-by: Tomi Valkeinen +Link: https://lore.kernel.org/r/20250723-cdns-dphy-hs-clk-rate-fix-v1-1-d4539d44cbe7@ideasonboard.com +Signed-off-by: Vinod Koul +Stable-dep-of: 284fb19a3ffb ("phy: cadence: cdns-dphy: Fix PLL lock and O_CMN_READY polling") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/phy/cadence/cdns-dphy.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/drivers/phy/cadence/cdns-dphy.c ++++ b/drivers/phy/cadence/cdns-dphy.c +@@ -79,6 +79,7 @@ struct cdns_dphy_cfg { + u8 pll_ipdiv; + u8 pll_opdiv; + u16 pll_fbdiv; ++ u32 hs_clk_rate; + unsigned int nlanes; + }; + +@@ -154,6 +155,9 @@ static int cdns_dsi_get_dphy_pll_cfg(str + cfg->pll_ipdiv, + pll_ref_hz); + ++ cfg->hs_clk_rate = div_u64((u64)pll_ref_hz * cfg->pll_fbdiv, ++ 2 * cfg->pll_opdiv * cfg->pll_ipdiv); ++ + return 0; + } + +@@ -297,6 +301,7 @@ static int cdns_dphy_config_from_opts(st + if (ret) + return ret; + ++ opts->hs_clk_rate = cfg->hs_clk_rate; + opts->wakeup = cdns_dphy_get_wakeup_time_ns(dphy) / 1000; + + return 0; diff --git a/queue-6.12/pm-runtime-add-new-devm-functions.patch b/queue-6.12/pm-runtime-add-new-devm-functions.patch new file mode 100644 index 0000000000..6c6b1210e4 --- /dev/null +++ b/queue-6.12/pm-runtime-add-new-devm-functions.patch @@ -0,0 +1,109 @@ +From stable+bounces-188094-greg=kroah.com@vger.kernel.org Mon Oct 20 15:02:47 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 09:02:38 -0400 +Subject: PM: runtime: Add new devm functions +To: stable@vger.kernel.org +Cc: "Bence Csókás" , "Rafael J. Wysocki" , "Sasha Levin" +Message-ID: <20251020130239.1763909-1-sashal@kernel.org> + +From: Bence Csókás + +[ Upstream commit 73db799bf5efc5a04654bb3ff6c9bf63a0dfa473 ] + +Add `devm_pm_runtime_set_active_enabled()` and +`devm_pm_runtime_get_noresume()` for simplifying +common cases in drivers. + +Signed-off-by: Bence Csókás +Link: https://patch.msgid.link/20250327195928.680771-3-csokas.bence@prolan.hu +Signed-off-by: Rafael J. Wysocki +Stable-dep-of: 0792c1984a45 ("iio: imu: inv_icm42600: Simplify pm_runtime setup") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/base/power/runtime.c | 44 +++++++++++++++++++++++++++++++++++++++++++ + include/linux/pm_runtime.h | 4 +++ + 2 files changed, 48 insertions(+) + +--- a/drivers/base/power/runtime.c ++++ b/drivers/base/power/runtime.c +@@ -1554,6 +1554,32 @@ out: + } + EXPORT_SYMBOL_GPL(pm_runtime_enable); + ++static void pm_runtime_set_suspended_action(void *data) ++{ ++ pm_runtime_set_suspended(data); ++} ++ ++/** ++ * devm_pm_runtime_set_active_enabled - set_active version of devm_pm_runtime_enable. ++ * ++ * @dev: Device to handle. ++ */ ++int devm_pm_runtime_set_active_enabled(struct device *dev) ++{ ++ int err; ++ ++ err = pm_runtime_set_active(dev); ++ if (err) ++ return err; ++ ++ err = devm_add_action_or_reset(dev, pm_runtime_set_suspended_action, dev); ++ if (err) ++ return err; ++ ++ return devm_pm_runtime_enable(dev); ++} ++EXPORT_SYMBOL_GPL(devm_pm_runtime_set_active_enabled); ++ + static void pm_runtime_disable_action(void *data) + { + pm_runtime_dont_use_autosuspend(data); +@@ -1576,6 +1602,24 @@ int devm_pm_runtime_enable(struct device + } + EXPORT_SYMBOL_GPL(devm_pm_runtime_enable); + ++static void pm_runtime_put_noidle_action(void *data) ++{ ++ pm_runtime_put_noidle(data); ++} ++ ++/** ++ * devm_pm_runtime_get_noresume - devres-enabled version of pm_runtime_get_noresume. ++ * ++ * @dev: Device to handle. ++ */ ++int devm_pm_runtime_get_noresume(struct device *dev) ++{ ++ pm_runtime_get_noresume(dev); ++ ++ return devm_add_action_or_reset(dev, pm_runtime_put_noidle_action, dev); ++} ++EXPORT_SYMBOL_GPL(devm_pm_runtime_get_noresume); ++ + /** + * pm_runtime_forbid - Block runtime PM of a device. + * @dev: Device to handle. +--- a/include/linux/pm_runtime.h ++++ b/include/linux/pm_runtime.h +@@ -94,7 +94,9 @@ extern void pm_runtime_new_link(struct d + extern void pm_runtime_drop_link(struct device_link *link); + extern void pm_runtime_release_supplier(struct device_link *link); + ++int devm_pm_runtime_set_active_enabled(struct device *dev); + extern int devm_pm_runtime_enable(struct device *dev); ++int devm_pm_runtime_get_noresume(struct device *dev); + + /** + * pm_suspend_ignore_children - Set runtime PM behavior regarding children. +@@ -278,7 +280,9 @@ static inline void __pm_runtime_disable( + static inline void pm_runtime_allow(struct device *dev) {} + static inline void pm_runtime_forbid(struct device *dev) {} + ++static inline int devm_pm_runtime_set_active_enabled(struct device *dev) { return 0; } + static inline int devm_pm_runtime_enable(struct device *dev) { return 0; } ++static inline int devm_pm_runtime_get_noresume(struct device *dev) { return 0; } + + static inline void pm_suspend_ignore_children(struct device *dev, bool enable) {} + static inline void pm_runtime_get_noresume(struct device *dev) {} diff --git a/queue-6.12/series b/queue-6.12/series index 925cdd006b..9621ee9334 100644 --- a/queue-6.12/series +++ b/queue-6.12/series @@ -91,3 +91,42 @@ hid-multitouch-fix-name-of-stylus-input-devices.patch asoc-amd-sdw_utils-avoid-null-deref-when-devm_kaspri.patch selftests-arg_parsing-ensure-data-is-flushed-to-disk.patch nvme-tcp-handle-tls-partially-sent-records-in-write_.patch +hfsplus-fix-slab-out-of-bounds-read-in-hfsplus_strcasecmp.patch +xfs-rename-the-old_crc-variable-in-xlog_recover_process.patch +xfs-fix-log-crc-mismatches-between-i386-and-other-architectures.patch +phy-cdns-dphy-store-hs_clk_rate-and-return-it.patch +phy-cadence-cdns-dphy-fix-pll-lock-and-o_cmn_ready-polling.patch +pm-runtime-add-new-devm-functions.patch +iio-imu-inv_icm42600-simplify-pm_runtime-setup.patch +iio-imu-inv_icm42600-avoid-configuring-if-already-pm_runtime-suspended.patch +nfsd-use-correct-error-code-when-decoding-extents.patch +nfsd-drop-dprintk-in-blocklayout-xdr-functions.patch +nfsd-rework-encoding-and-decoding-of-nfsd4_deviceid.patch +nfsd-minor-cleanup-in-layoutcommit-processing.patch +nfsd-implement-large-extent-array-support-in-pnfs.patch +nfsd-fix-last-write-offset-handling-in-layoutcommit.patch +wifi-rtw89-avoid-possible-tx-wait-initialization-race.patch +xfs-use-deferred-intent-items-for-reaping-crosslinked-blocks.patch +padata-reset-next-cpu-when-reorder-sequence-wraps-around.patch +md-raid0-handle-bio_split-errors.patch +md-raid1-handle-bio_split-errors.patch +md-raid10-handle-bio_split-errors.patch +md-fix-mssing-blktrace-bio-split-events.patch +x86-resctrl-refactor-resctrl_arch_rmid_read.patch +x86-resctrl-fix-miscount-of-bandwidth-event-when-reactivating-previously-unavailable-rmid.patch +d_alloc_parallel-set-dcache_par_lookup-earlier.patch +vfs-don-t-leak-disconnected-dentries-on-umount.patch +pci-add-pci_vdevice_sub-helper-macro.patch +ixgbevf-add-support-for-intel-r-e610-device.patch +ixgbevf-fix-getting-link-speed-data-for-e610-devices.patch +ixgbevf-fix-mailbox-api-compatibility-by-negotiating-supported-features.patch +tcp-convert-to-dev_net_rcu.patch +tcp-cache-rtax_quickack-metric-in-a-hot-cache-line.patch +net-dst-add-four-helpers-to-annotate-data-races-around-dst-dev.patch +ipv4-adopt-dst_dev-skb_dst_dev-and-skb_dst_dev_net.patch +net-add-locking-to-protect-skb-dev-access-in-ip_output.patch +mptcp-call-dst_release-in-mptcp_active_enable.patch +mptcp-use-__sk_dst_get-and-dst_dev_rcu-in-mptcp_active_enable.patch +mptcp-reset-blackhole-on-success-with-non-loopback-ifaces.patch +phy-cadence-cdns-dphy-update-calibration-wait-time-for-startup-state-machine.patch +nfsd-define-a-proc_layoutcommit-for-the-flexfiles-layout-type.patch diff --git a/queue-6.12/tcp-cache-rtax_quickack-metric-in-a-hot-cache-line.patch b/queue-6.12/tcp-cache-rtax_quickack-metric-in-a-hot-cache-line.patch new file mode 100644 index 0000000000..0881fd9fce --- /dev/null +++ b/queue-6.12/tcp-cache-rtax_quickack-metric-in-a-hot-cache-line.patch @@ -0,0 +1,81 @@ +From stable+bounces-188148-greg=kroah.com@vger.kernel.org Mon Oct 20 17:45:17 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 11:44:03 -0400 +Subject: tcp: cache RTAX_QUICKACK metric in a hot cache line +To: stable@vger.kernel.org +Cc: Eric Dumazet , Jason Xing , Neal Cardwell , Kuniyuki Iwashima , Paolo Abeni , Sasha Levin +Message-ID: <20251020154409.1823664-2-sashal@kernel.org> + +From: Eric Dumazet + +[ Upstream commit 15492700ac41459b54a6683490adcee350ab11e3 ] + +tcp_in_quickack_mode() is called from input path for small packets. + +It calls __sk_dst_get() which reads sk->sk_dst_cache which has been +put in sock_read_tx group (for good reasons). + +Then dst_metric(dst, RTAX_QUICKACK) also needs extra cache line misses. + +Cache RTAX_QUICKACK in icsk->icsk_ack.dst_quick_ack to no longer pull +these cache lines for the cases a delayed ACK is scheduled. + +After this patch TCP receive path does not longer access sock_read_tx +group. + +Signed-off-by: Eric Dumazet +Reviewed-by: Jason Xing +Reviewed-by: Neal Cardwell +Reviewed-by: Kuniyuki Iwashima +Link: https://patch.msgid.link/20250312083907.1931644-1-edumazet@google.com +Signed-off-by: Paolo Abeni +Stable-dep-of: 833d4313bc1e ("mptcp: reset blackhole on success with non-loopback ifaces") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + include/net/inet_connection_sock.h | 3 ++- + net/core/sock.c | 6 +++++- + net/ipv4/tcp_input.c | 3 +-- + 3 files changed, 8 insertions(+), 4 deletions(-) + +--- a/include/net/inet_connection_sock.h ++++ b/include/net/inet_connection_sock.h +@@ -116,7 +116,8 @@ struct inet_connection_sock { + #define ATO_BITS 8 + __u32 ato:ATO_BITS, /* Predicted tick of soft clock */ + lrcv_flowlabel:20, /* last received ipv6 flowlabel */ +- unused:4; ++ dst_quick_ack:1, /* cache dst RTAX_QUICKACK */ ++ unused:3; + unsigned long timeout; /* Currently scheduled timeout */ + __u32 lrcvtime; /* timestamp of last received data packet */ + __u16 last_seg_size; /* Size of last incoming segment */ +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -2547,8 +2547,12 @@ void sk_setup_caps(struct sock *sk, stru + u32 max_segs = 1; + + sk->sk_route_caps = dst->dev->features; +- if (sk_is_tcp(sk)) ++ if (sk_is_tcp(sk)) { ++ struct inet_connection_sock *icsk = inet_csk(sk); ++ + sk->sk_route_caps |= NETIF_F_GSO; ++ icsk->icsk_ack.dst_quick_ack = dst_metric(dst, RTAX_QUICKACK); ++ } + if (sk->sk_route_caps & NETIF_F_GSO) + sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE; + if (unlikely(sk->sk_gso_disabled)) +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -331,9 +331,8 @@ static void tcp_enter_quickack_mode(stru + static bool tcp_in_quickack_mode(struct sock *sk) + { + const struct inet_connection_sock *icsk = inet_csk(sk); +- const struct dst_entry *dst = __sk_dst_get(sk); + +- return (dst && dst_metric(dst, RTAX_QUICKACK)) || ++ return icsk->icsk_ack.dst_quick_ack || + (icsk->icsk_ack.quick && !inet_csk_in_pingpong_mode(sk)); + } + diff --git a/queue-6.12/tcp-convert-to-dev_net_rcu.patch b/queue-6.12/tcp-convert-to-dev_net_rcu.patch new file mode 100644 index 0000000000..a7d0a938db --- /dev/null +++ b/queue-6.12/tcp-convert-to-dev_net_rcu.patch @@ -0,0 +1,197 @@ +From stable+bounces-188147-greg=kroah.com@vger.kernel.org Mon Oct 20 17:44:21 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 11:44:02 -0400 +Subject: tcp: convert to dev_net_rcu() +To: stable@vger.kernel.org +Cc: Eric Dumazet , Kuniyuki Iwashima , Jakub Kicinski , Sasha Levin +Message-ID: <20251020154409.1823664-1-sashal@kernel.org> + +From: Eric Dumazet + +[ Upstream commit e7b9ecce562ca6a1de32c56c597fa45e08c44ec0 ] + +TCP uses of dev_net() are under RCU protection, change them +to dev_net_rcu() to get LOCKDEP support. + +Signed-off-by: Eric Dumazet +Reviewed-by: Kuniyuki Iwashima +Link: https://patch.msgid.link/20250301201424.2046477-4-edumazet@google.com +Signed-off-by: Jakub Kicinski +Stable-dep-of: 833d4313bc1e ("mptcp: reset blackhole on success with non-loopback ifaces") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + include/net/inet6_hashtables.h | 2 +- + include/net/inet_hashtables.h | 2 +- + net/ipv4/tcp_ipv4.c | 12 ++++++------ + net/ipv4/tcp_metrics.c | 6 +++--- + net/ipv6/tcp_ipv6.c | 22 +++++++++++----------- + 5 files changed, 22 insertions(+), 22 deletions(-) + +--- a/include/net/inet6_hashtables.h ++++ b/include/net/inet6_hashtables.h +@@ -150,7 +150,7 @@ static inline struct sock *__inet6_looku + int iif, int sdif, + bool *refcounted) + { +- struct net *net = dev_net(skb_dst(skb)->dev); ++ struct net *net = dev_net_rcu(skb_dst(skb)->dev); + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + struct sock *sk; + +--- a/include/net/inet_hashtables.h ++++ b/include/net/inet_hashtables.h +@@ -492,7 +492,7 @@ static inline struct sock *__inet_lookup + const int sdif, + bool *refcounted) + { +- struct net *net = dev_net(skb_dst(skb)->dev); ++ struct net *net = dev_net_rcu(skb_dst(skb)->dev); + const struct iphdr *iph = ip_hdr(skb); + struct sock *sk; + +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -494,14 +494,14 @@ int tcp_v4_err(struct sk_buff *skb, u32 + { + const struct iphdr *iph = (const struct iphdr *)skb->data; + struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2)); +- struct tcp_sock *tp; ++ struct net *net = dev_net_rcu(skb->dev); + const int type = icmp_hdr(skb)->type; + const int code = icmp_hdr(skb)->code; +- struct sock *sk; + struct request_sock *fastopen; ++ struct tcp_sock *tp; + u32 seq, snd_una; ++ struct sock *sk; + int err; +- struct net *net = dev_net(skb->dev); + + sk = __inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, + iph->daddr, th->dest, iph->saddr, +@@ -786,7 +786,7 @@ static void tcp_v4_send_reset(const stru + arg.iov[0].iov_base = (unsigned char *)&rep; + arg.iov[0].iov_len = sizeof(rep.th); + +- net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); ++ net = sk ? sock_net(sk) : dev_net_rcu(skb_dst(skb)->dev); + + /* Invalid TCP option size or twice included auth */ + if (tcp_parse_auth_options(tcp_hdr(skb), &md5_hash_location, &aoh)) +@@ -1965,7 +1965,7 @@ EXPORT_SYMBOL(tcp_v4_do_rcv); + + int tcp_v4_early_demux(struct sk_buff *skb) + { +- struct net *net = dev_net(skb->dev); ++ struct net *net = dev_net_rcu(skb->dev); + const struct iphdr *iph; + const struct tcphdr *th; + struct sock *sk; +@@ -2176,7 +2176,7 @@ static void tcp_v4_fill_cb(struct sk_buf + + int tcp_v4_rcv(struct sk_buff *skb) + { +- struct net *net = dev_net(skb->dev); ++ struct net *net = dev_net_rcu(skb->dev); + enum skb_drop_reason drop_reason; + int sdif = inet_sdif(skb); + int dif = inet_iif(skb); +--- a/net/ipv4/tcp_metrics.c ++++ b/net/ipv4/tcp_metrics.c +@@ -170,7 +170,7 @@ static struct tcp_metrics_block *tcpm_ne + bool reclaim = false; + + spin_lock_bh(&tcp_metrics_lock); +- net = dev_net(dst->dev); ++ net = dev_net_rcu(dst->dev); + + /* While waiting for the spin-lock the cache might have been populated + * with this entry and so we have to check again. +@@ -273,7 +273,7 @@ static struct tcp_metrics_block *__tcp_g + return NULL; + } + +- net = dev_net(dst->dev); ++ net = dev_net_rcu(dst->dev); + hash ^= net_hash_mix(net); + hash = hash_32(hash, tcp_metrics_hash_log); + +@@ -318,7 +318,7 @@ static struct tcp_metrics_block *tcp_get + else + return NULL; + +- net = dev_net(dst->dev); ++ net = dev_net_rcu(dst->dev); + hash ^= net_hash_mix(net); + hash = hash_32(hash, tcp_metrics_hash_log); + +--- a/net/ipv6/tcp_ipv6.c ++++ b/net/ipv6/tcp_ipv6.c +@@ -376,7 +376,7 @@ static int tcp_v6_err(struct sk_buff *sk + { + const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; + const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); +- struct net *net = dev_net(skb->dev); ++ struct net *net = dev_net_rcu(skb->dev); + struct request_sock *fastopen; + struct ipv6_pinfo *np; + struct tcp_sock *tp; +@@ -864,16 +864,16 @@ static void tcp_v6_send_response(const s + int oif, int rst, u8 tclass, __be32 label, + u32 priority, u32 txhash, struct tcp_key *key) + { +- const struct tcphdr *th = tcp_hdr(skb); +- struct tcphdr *t1; +- struct sk_buff *buff; +- struct flowi6 fl6; +- struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); +- struct sock *ctl_sk = net->ipv6.tcp_sk; ++ struct net *net = sk ? sock_net(sk) : dev_net_rcu(skb_dst(skb)->dev); + unsigned int tot_len = sizeof(struct tcphdr); ++ struct sock *ctl_sk = net->ipv6.tcp_sk; ++ const struct tcphdr *th = tcp_hdr(skb); + __be32 mrst = 0, *topt; + struct dst_entry *dst; +- __u32 mark = 0; ++ struct sk_buff *buff; ++ struct tcphdr *t1; ++ struct flowi6 fl6; ++ u32 mark = 0; + + if (tsecr) + tot_len += TCPOLEN_TSTAMP_ALIGNED; +@@ -1036,7 +1036,7 @@ static void tcp_v6_send_reset(const stru + if (!sk && !ipv6_unicast_destination(skb)) + return; + +- net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); ++ net = sk ? sock_net(sk) : dev_net_rcu(skb_dst(skb)->dev); + /* Invalid TCP option size or twice included auth */ + if (tcp_parse_auth_options(th, &md5_hash_location, &aoh)) + return; +@@ -1739,6 +1739,7 @@ static void tcp_v6_fill_cb(struct sk_buf + + INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) + { ++ struct net *net = dev_net_rcu(skb->dev); + enum skb_drop_reason drop_reason; + int sdif = inet6_sdif(skb); + int dif = inet6_iif(skb); +@@ -1748,7 +1749,6 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(s + bool refcounted; + int ret; + u32 isn; +- struct net *net = dev_net(skb->dev); + + drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; + if (skb->pkt_type != PACKET_HOST) +@@ -1999,7 +1999,7 @@ do_time_wait: + + void tcp_v6_early_demux(struct sk_buff *skb) + { +- struct net *net = dev_net(skb->dev); ++ struct net *net = dev_net_rcu(skb->dev); + const struct ipv6hdr *hdr; + const struct tcphdr *th; + struct sock *sk; diff --git a/queue-6.12/vfs-don-t-leak-disconnected-dentries-on-umount.patch b/queue-6.12/vfs-don-t-leak-disconnected-dentries-on-umount.patch new file mode 100644 index 0000000000..cc2c984012 --- /dev/null +++ b/queue-6.12/vfs-don-t-leak-disconnected-dentries-on-umount.patch @@ -0,0 +1,57 @@ +From stable+bounces-188212-greg=kroah.com@vger.kernel.org Mon Oct 20 19:29:10 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 13:28:59 -0400 +Subject: vfs: Don't leak disconnected dentries on umount +To: stable@vger.kernel.org +Cc: Jan Kara , syzbot+1d79ebe5383fc016cf07@syzkaller.appspotmail.com, Christian Brauner , Sasha Levin +Message-ID: <20251020172900.1851256-2-sashal@kernel.org> + +From: Jan Kara + +[ Upstream commit 56094ad3eaa21e6621396cc33811d8f72847a834 ] + +When user calls open_by_handle_at() on some inode that is not cached, we +will create disconnected dentry for it. If such dentry is a directory, +exportfs_decode_fh_raw() will then try to connect this dentry to the +dentry tree through reconnect_path(). It may happen for various reasons +(such as corrupted fs or race with rename) that the call to +lookup_one_unlocked() in reconnect_one() will fail to find the dentry we +are trying to reconnect and instead create a new dentry under the +parent. Now this dentry will not be marked as disconnected although the +parent still may well be disconnected (at least in case this +inconsistency happened because the fs is corrupted and .. doesn't point +to the real parent directory). This creates inconsistency in +disconnected flags but AFAICS it was mostly harmless. At least until +commit f1ee616214cb ("VFS: don't keep disconnected dentries on d_anon") +which removed adding of most disconnected dentries to sb->s_anon list. +Thus after this commit cleanup of disconnected dentries implicitely +relies on the fact that dput() will immediately reclaim such dentries. +However when some leaf dentry isn't marked as disconnected, as in the +scenario described above, the reclaim doesn't happen and the dentries +are "leaked". Memory reclaim can eventually reclaim them but otherwise +they stay in memory and if umount comes first, we hit infamous "Busy +inodes after unmount" bug. Make sure all dentries created under a +disconnected parent are marked as disconnected as well. + +Reported-by: syzbot+1d79ebe5383fc016cf07@syzkaller.appspotmail.com +Fixes: f1ee616214cb ("VFS: don't keep disconnected dentries on d_anon") +CC: stable@vger.kernel.org +Signed-off-by: Jan Kara +Signed-off-by: Christian Brauner +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/dcache.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/fs/dcache.c ++++ b/fs/dcache.c +@@ -2486,6 +2486,8 @@ struct dentry *d_alloc_parallel(struct d + spin_lock(&parent->d_lock); + new->d_parent = dget_dlock(parent); + hlist_add_head(&new->d_sib, &parent->d_children); ++ if (parent->d_flags & DCACHE_DISCONNECTED) ++ new->d_flags |= DCACHE_DISCONNECTED; + spin_unlock(&parent->d_lock); + + retry: diff --git a/queue-6.12/wifi-rtw89-avoid-possible-tx-wait-initialization-race.patch b/queue-6.12/wifi-rtw89-avoid-possible-tx-wait-initialization-race.patch new file mode 100644 index 0000000000..9833645e1d --- /dev/null +++ b/queue-6.12/wifi-rtw89-avoid-possible-tx-wait-initialization-race.patch @@ -0,0 +1,230 @@ +From stable+bounces-188156-greg=kroah.com@vger.kernel.org Mon Oct 20 17:50:51 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 11:47:33 -0400 +Subject: wifi: rtw89: avoid possible TX wait initialization race +To: stable@vger.kernel.org +Cc: Fedor Pchelkin , Ping-Ke Shih , Sasha Levin +Message-ID: <20251020154733.1824513-1-sashal@kernel.org> + +From: Fedor Pchelkin + +[ Upstream commit c24248ed78f33ea299ea61d105355ba47157d49f ] + +The value of skb_data->wait indicates whether skb is passed on to the +core mac80211 stack or released by the driver itself. Make sure that by +the time skb is added to txwd queue and becomes visible to the completing +side, it has already allocated and initialized TX wait related data (in +case it's needed). + +This is found by code review and addresses a possible race scenario +described below: + + Waiting thread Completing thread + +rtw89_core_send_nullfunc() + rtw89_core_tx_write_link() + ... + rtw89_pci_txwd_submit() + skb_data->wait = NULL + /* add skb to the queue */ + skb_queue_tail(&txwd->queue, skb) + + /* another thread (e.g. rtw89_ops_tx) performs TX kick off for the same queue */ + + rtw89_pci_napi_poll() + ... + rtw89_pci_release_txwd_skb() + /* get skb from the queue */ + skb_unlink(skb, &txwd->queue) + rtw89_pci_tx_status() + rtw89_core_tx_wait_complete() + /* use incorrect skb_data->wait */ + rtw89_core_tx_kick_off_and_wait() + /* assign skb_data->wait but too late */ + +Found by Linux Verification Center (linuxtesting.org). + +Fixes: 1ae5ca615285 ("wifi: rtw89: add function to wait for completion of TX skbs") +Cc: stable@vger.kernel.org +Signed-off-by: Fedor Pchelkin +Acked-by: Ping-Ke Shih +Signed-off-by: Ping-Ke Shih +Link: https://patch.msgid.link/20250919210852.823912-3-pchelkin@ispras.ru +[ adapted rtw89_core_tx_write_link() modifications to rtw89_core_tx_write() ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/wireless/realtek/rtw89/core.c | 39 ++++++++++++++------------ + drivers/net/wireless/realtek/rtw89/core.h | 6 ++-- + drivers/net/wireless/realtek/rtw89/mac80211.c | 2 - + drivers/net/wireless/realtek/rtw89/pci.c | 2 - + 4 files changed, 26 insertions(+), 23 deletions(-) + +--- a/drivers/net/wireless/realtek/rtw89/core.c ++++ b/drivers/net/wireless/realtek/rtw89/core.c +@@ -978,25 +978,14 @@ void rtw89_core_tx_kick_off(struct rtw89 + } + + int rtw89_core_tx_kick_off_and_wait(struct rtw89_dev *rtwdev, struct sk_buff *skb, +- int qsel, unsigned int timeout) ++ struct rtw89_tx_wait_info *wait, int qsel, ++ unsigned int timeout) + { +- struct rtw89_tx_skb_data *skb_data = RTW89_TX_SKB_CB(skb); +- struct rtw89_tx_wait_info *wait; + unsigned long time_left; + int ret = 0; + + lockdep_assert_wiphy(rtwdev->hw->wiphy); + +- wait = kzalloc(sizeof(*wait), GFP_KERNEL); +- if (!wait) { +- rtw89_core_tx_kick_off(rtwdev, qsel); +- return 0; +- } +- +- init_completion(&wait->completion); +- wait->skb = skb; +- rcu_assign_pointer(skb_data->wait, wait); +- + rtw89_core_tx_kick_off(rtwdev, qsel); + time_left = wait_for_completion_timeout(&wait->completion, + msecs_to_jiffies(timeout)); +@@ -1057,10 +1046,12 @@ int rtw89_h2c_tx(struct rtw89_dev *rtwde + } + + int rtw89_core_tx_write(struct rtw89_dev *rtwdev, struct ieee80211_vif *vif, +- struct ieee80211_sta *sta, struct sk_buff *skb, int *qsel) ++ struct ieee80211_sta *sta, struct sk_buff *skb, int *qsel, ++ struct rtw89_tx_wait_info *wait) + { + struct rtw89_sta *rtwsta = sta_to_rtwsta_safe(sta); + struct rtw89_vif *rtwvif = vif_to_rtwvif(vif); ++ struct rtw89_tx_skb_data *skb_data = RTW89_TX_SKB_CB(skb); + struct rtw89_core_tx_request tx_req = {0}; + struct rtw89_sta_link *rtwsta_link = NULL; + struct rtw89_vif_link *rtwvif_link; +@@ -1093,6 +1084,8 @@ int rtw89_core_tx_write(struct rtw89_dev + rtw89_core_tx_update_desc_info(rtwdev, &tx_req); + rtw89_core_tx_wake(rtwdev, &tx_req); + ++ rcu_assign_pointer(skb_data->wait, wait); ++ + ret = rtw89_hci_tx_write(rtwdev, &tx_req); + if (ret) { + rtw89_err(rtwdev, "failed to transmit skb to HCI\n"); +@@ -2908,7 +2901,7 @@ static void rtw89_core_txq_push(struct r + goto out; + } + rtw89_core_txq_check_agg(rtwdev, rtwtxq, skb); +- ret = rtw89_core_tx_write(rtwdev, vif, sta, skb, NULL); ++ ret = rtw89_core_tx_write(rtwdev, vif, sta, skb, NULL, NULL); + if (ret) { + rtw89_err(rtwdev, "failed to push txq: %d\n", ret); + ieee80211_free_txskb(rtwdev->hw, skb); +@@ -3084,7 +3077,7 @@ bottom: + skb_queue_walk_safe(&rtwsta->roc_queue, skb, tmp) { + skb_unlink(skb, &rtwsta->roc_queue); + +- ret = rtw89_core_tx_write(rtwdev, vif, sta, skb, &qsel); ++ ret = rtw89_core_tx_write(rtwdev, vif, sta, skb, &qsel, NULL); + if (ret) { + rtw89_warn(rtwdev, "pending tx failed with %d\n", ret); + dev_kfree_skb_any(skb); +@@ -3106,6 +3099,7 @@ static int rtw89_core_send_nullfunc(stru + struct rtw89_vif_link *rtwvif_link, bool qos, bool ps) + { + struct ieee80211_vif *vif = rtwvif_link_to_vif(rtwvif_link); ++ struct rtw89_tx_wait_info *wait; + struct ieee80211_sta *sta; + struct ieee80211_hdr *hdr; + struct sk_buff *skb; +@@ -3114,6 +3108,12 @@ static int rtw89_core_send_nullfunc(stru + if (vif->type != NL80211_IFTYPE_STATION || !vif->cfg.assoc) + return 0; + ++ wait = kzalloc(sizeof(*wait), GFP_KERNEL); ++ if (!wait) ++ return -ENOMEM; ++ ++ init_completion(&wait->completion); ++ + rcu_read_lock(); + sta = ieee80211_find_sta(vif, vif->cfg.ap_addr); + if (!sta) { +@@ -3127,11 +3127,13 @@ static int rtw89_core_send_nullfunc(stru + goto out; + } + ++ wait->skb = skb; ++ + hdr = (struct ieee80211_hdr *)skb->data; + if (ps) + hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PM); + +- ret = rtw89_core_tx_write(rtwdev, vif, sta, skb, &qsel); ++ ret = rtw89_core_tx_write(rtwdev, vif, sta, skb, &qsel, wait); + if (ret) { + rtw89_warn(rtwdev, "nullfunc transmit failed: %d\n", ret); + dev_kfree_skb_any(skb); +@@ -3140,10 +3142,11 @@ static int rtw89_core_send_nullfunc(stru + + rcu_read_unlock(); + +- return rtw89_core_tx_kick_off_and_wait(rtwdev, skb, qsel, ++ return rtw89_core_tx_kick_off_and_wait(rtwdev, skb, wait, qsel, + RTW89_ROC_TX_TIMEOUT); + out: + rcu_read_unlock(); ++ kfree(wait); + + return ret; + } +--- a/drivers/net/wireless/realtek/rtw89/core.h ++++ b/drivers/net/wireless/realtek/rtw89/core.h +@@ -6818,12 +6818,14 @@ static inline bool rtw89_is_rtl885xb(str + } + + int rtw89_core_tx_write(struct rtw89_dev *rtwdev, struct ieee80211_vif *vif, +- struct ieee80211_sta *sta, struct sk_buff *skb, int *qsel); ++ struct ieee80211_sta *sta, struct sk_buff *skb, int *qsel, ++ struct rtw89_tx_wait_info *wait); + int rtw89_h2c_tx(struct rtw89_dev *rtwdev, + struct sk_buff *skb, bool fwdl); + void rtw89_core_tx_kick_off(struct rtw89_dev *rtwdev, u8 qsel); + int rtw89_core_tx_kick_off_and_wait(struct rtw89_dev *rtwdev, struct sk_buff *skb, +- int qsel, unsigned int timeout); ++ struct rtw89_tx_wait_info *wait, int qsel, ++ unsigned int timeout); + void rtw89_core_fill_txdesc(struct rtw89_dev *rtwdev, + struct rtw89_tx_desc_info *desc_info, + void *txdesc); +--- a/drivers/net/wireless/realtek/rtw89/mac80211.c ++++ b/drivers/net/wireless/realtek/rtw89/mac80211.c +@@ -36,7 +36,7 @@ static void rtw89_ops_tx(struct ieee8021 + return; + } + +- ret = rtw89_core_tx_write(rtwdev, vif, sta, skb, &qsel); ++ ret = rtw89_core_tx_write(rtwdev, vif, sta, skb, &qsel, NULL); + if (ret) { + rtw89_err(rtwdev, "failed to transmit skb: %d\n", ret); + ieee80211_free_txskb(hw, skb); +--- a/drivers/net/wireless/realtek/rtw89/pci.c ++++ b/drivers/net/wireless/realtek/rtw89/pci.c +@@ -1366,7 +1366,6 @@ static int rtw89_pci_txwd_submit(struct + struct pci_dev *pdev = rtwpci->pdev; + struct sk_buff *skb = tx_req->skb; + struct rtw89_pci_tx_data *tx_data = RTW89_PCI_TX_SKB_CB(skb); +- struct rtw89_tx_skb_data *skb_data = RTW89_TX_SKB_CB(skb); + bool en_wd_info = desc_info->en_wd_info; + u32 txwd_len; + u32 txwp_len; +@@ -1382,7 +1381,6 @@ static int rtw89_pci_txwd_submit(struct + } + + tx_data->dma = dma; +- rcu_assign_pointer(skb_data->wait, NULL); + + txwp_len = sizeof(*txwp_info); + txwd_len = chip->txwd_body_size; diff --git a/queue-6.12/x86-resctrl-fix-miscount-of-bandwidth-event-when-reactivating-previously-unavailable-rmid.patch b/queue-6.12/x86-resctrl-fix-miscount-of-bandwidth-event-when-reactivating-previously-unavailable-rmid.patch new file mode 100644 index 0000000000..78805c815b --- /dev/null +++ b/queue-6.12/x86-resctrl-fix-miscount-of-bandwidth-event-when-reactivating-previously-unavailable-rmid.patch @@ -0,0 +1,149 @@ +From stable+bounces-188200-greg=kroah.com@vger.kernel.org Mon Oct 20 18:41:02 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 12:38:53 -0400 +Subject: x86/resctrl: Fix miscount of bandwidth event when reactivating previously unavailable RMID +To: stable@vger.kernel.org +Cc: Babu Moger , "Borislav Petkov (AMD)" , Reinette Chatre , Sasha Levin +Message-ID: <20251020163853.1841192-2-sashal@kernel.org> + +From: Babu Moger + +[ Upstream commit 15292f1b4c55a3a7c940dbcb6cb8793871ed3d92 ] + +Users can create as many monitoring groups as the number of RMIDs supported +by the hardware. However, on AMD systems, only a limited number of RMIDs +are guaranteed to be actively tracked by the hardware. RMIDs that exceed +this limit are placed in an "Unavailable" state. + +When a bandwidth counter is read for such an RMID, the hardware sets +MSR_IA32_QM_CTR.Unavailable (bit 62). When such an RMID starts being tracked +again the hardware counter is reset to zero. MSR_IA32_QM_CTR.Unavailable +remains set on first read after tracking re-starts and is clear on all +subsequent reads as long as the RMID is tracked. + +resctrl miscounts the bandwidth events after an RMID transitions from the +"Unavailable" state back to being tracked. This happens because when the +hardware starts counting again after resetting the counter to zero, resctrl +in turn compares the new count against the counter value stored from the +previous time the RMID was tracked. + +This results in resctrl computing an event value that is either undercounting +(when new counter is more than stored counter) or a mistaken overflow (when +new counter is less than stored counter). + +Reset the stored value (arch_mbm_state::prev_msr) of MSR_IA32_QM_CTR to +zero whenever the RMID is in the "Unavailable" state to ensure accurate +counting after the RMID resets to zero when it starts to be tracked again. + +Example scenario that results in mistaken overflow +================================================== +1. The resctrl filesystem is mounted, and a task is assigned to a + monitoring group. + + $mount -t resctrl resctrl /sys/fs/resctrl + $mkdir /sys/fs/resctrl/mon_groups/test1/ + $echo 1234 > /sys/fs/resctrl/mon_groups/test1/tasks + + $cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes + 21323 <- Total bytes on domain 0 + "Unavailable" <- Total bytes on domain 1 + + Task is running on domain 0. Counter on domain 1 is "Unavailable". + +2. The task runs on domain 0 for a while and then moves to domain 1. The + counter starts incrementing on domain 1. + + $cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes + 7345357 <- Total bytes on domain 0 + 4545 <- Total bytes on domain 1 + +3. At some point, the RMID in domain 0 transitions to the "Unavailable" + state because the task is no longer executing in that domain. + + $cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes + "Unavailable" <- Total bytes on domain 0 + 434341 <- Total bytes on domain 1 + +4. Since the task continues to migrate between domains, it may eventually + return to domain 0. + + $cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes + 17592178699059 <- Overflow on domain 0 + 3232332 <- Total bytes on domain 1 + +In this case, the RMID on domain 0 transitions from "Unavailable" state to +active state. The hardware sets MSR_IA32_QM_CTR.Unavailable (bit 62) when +the counter is read and begins tracking the RMID counting from 0. + +Subsequent reads succeed but return a value smaller than the previously +saved MSR value (7345357). Consequently, the resctrl's overflow logic is +triggered, it compares the previous value (7345357) with the new, smaller +value and incorrectly interprets this as a counter overflow, adding a large +delta. + +In reality, this is a false positive: the counter did not overflow but was +simply reset when the RMID transitioned from "Unavailable" back to active +state. + +Here is the text from APM [1] available from [2]. + +"In PQOS Version 2.0 or higher, the MBM hardware will set the U bit on the +first QM_CTR read when it begins tracking an RMID that it was not +previously tracking. The U bit will be zero for all subsequent reads from +that RMID while it is still tracked by the hardware. Therefore, a QM_CTR +read with the U bit set when that RMID is in use by a processor can be +considered 0 when calculating the difference with a subsequent read." + +[1] AMD64 Architecture Programmer's Manual Volume 2: System Programming + Publication # 24593 Revision 3.41 section 19.3.3 Monitoring L3 Memory + Bandwidth (MBM). + + [ bp: Split commit message into smaller paragraph chunks for better + consumption. ] + +Fixes: 4d05bf71f157d ("x86/resctrl: Introduce AMD QOS feature") +Signed-off-by: Babu Moger +Signed-off-by: Borislav Petkov (AMD) +Reviewed-by: Reinette Chatre +Tested-by: Reinette Chatre +Cc: stable@vger.kernel.org # needs adjustments for <= v6.17 +Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537 # [2] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/cpu/resctrl/monitor.c | 14 ++++++++++---- + 1 file changed, 10 insertions(+), 4 deletions(-) + +--- a/arch/x86/kernel/cpu/resctrl/monitor.c ++++ b/arch/x86/kernel/cpu/resctrl/monitor.c +@@ -337,7 +337,9 @@ int resctrl_arch_rmid_read(struct rdt_re + u32 unused, u32 rmid, enum resctrl_event_id eventid, + u64 *val, void *ignored) + { ++ struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); + int cpu = cpumask_any(&d->hdr.cpu_mask); ++ struct arch_mbm_state *am; + u64 msr_val; + u32 prmid; + int ret; +@@ -346,12 +348,16 @@ int resctrl_arch_rmid_read(struct rdt_re + + prmid = logical_rmid_to_physical_rmid(cpu, rmid); + ret = __rmid_read_phys(prmid, eventid, &msr_val); +- if (ret) +- return ret; + +- *val = get_corrected_val(r, d, rmid, eventid, msr_val); ++ if (!ret) { ++ *val = get_corrected_val(r, d, rmid, eventid, msr_val); ++ } else if (ret == -EINVAL) { ++ am = get_arch_mbm_state(hw_dom, rmid, eventid); ++ if (am) ++ am->prev_msr = 0; ++ } + +- return 0; ++ return ret; + } + + static void limbo_release_entry(struct rmid_entry *entry) diff --git a/queue-6.12/x86-resctrl-refactor-resctrl_arch_rmid_read.patch b/queue-6.12/x86-resctrl-refactor-resctrl_arch_rmid_read.patch new file mode 100644 index 0000000000..ab0f5656ca --- /dev/null +++ b/queue-6.12/x86-resctrl-refactor-resctrl_arch_rmid_read.patch @@ -0,0 +1,89 @@ +From stable+bounces-188199-greg=kroah.com@vger.kernel.org Mon Oct 20 18:40:59 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 12:38:52 -0400 +Subject: x86/resctrl: Refactor resctrl_arch_rmid_read() +To: stable@vger.kernel.org +Cc: Babu Moger , "Borislav Petkov (AMD)" , Reinette Chatre , Sasha Levin +Message-ID: <20251020163853.1841192-1-sashal@kernel.org> + +From: Babu Moger + +[ Upstream commit 7c9ac605e202c4668e441fc8146a993577131ca1 ] + +resctrl_arch_rmid_read() adjusts the value obtained from MSR_IA32_QM_CTR to +account for the overflow for MBM events and apply counter scaling for all the +events. This logic is common to both reading an RMID and reading a hardware +counter directly. + +Refactor the hardware value adjustment logic into get_corrected_val() to +prepare for support of reading a hardware counter. + +Signed-off-by: Babu Moger +Signed-off-by: Borislav Petkov (AMD) +Reviewed-by: Reinette Chatre +Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com +Stable-dep-of: 15292f1b4c55 ("x86/resctrl: Fix miscount of bandwidth event when reactivating previously unavailable RMID") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/cpu/resctrl/monitor.c | 38 ++++++++++++++++++++-------------- + 1 file changed, 23 insertions(+), 15 deletions(-) + +--- a/arch/x86/kernel/cpu/resctrl/monitor.c ++++ b/arch/x86/kernel/cpu/resctrl/monitor.c +@@ -312,24 +312,13 @@ static u64 mbm_overflow_count(u64 prev_m + return chunks >> shift; + } + +-int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d, +- u32 unused, u32 rmid, enum resctrl_event_id eventid, +- u64 *val, void *ignored) ++static u64 get_corrected_val(struct rdt_resource *r, struct rdt_mon_domain *d, ++ u32 rmid, enum resctrl_event_id eventid, u64 msr_val) + { + struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); +- int cpu = cpumask_any(&d->hdr.cpu_mask); + struct arch_mbm_state *am; +- u64 msr_val, chunks; +- u32 prmid; +- int ret; +- +- resctrl_arch_rmid_read_context_check(); +- +- prmid = logical_rmid_to_physical_rmid(cpu, rmid); +- ret = __rmid_read_phys(prmid, eventid, &msr_val); +- if (ret) +- return ret; ++ u64 chunks; + + am = get_arch_mbm_state(hw_dom, rmid, eventid); + if (am) { +@@ -341,7 +330,26 @@ int resctrl_arch_rmid_read(struct rdt_re + chunks = msr_val; + } + +- *val = chunks * hw_res->mon_scale; ++ return chunks * hw_res->mon_scale; ++} ++ ++int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d, ++ u32 unused, u32 rmid, enum resctrl_event_id eventid, ++ u64 *val, void *ignored) ++{ ++ int cpu = cpumask_any(&d->hdr.cpu_mask); ++ u64 msr_val; ++ u32 prmid; ++ int ret; ++ ++ resctrl_arch_rmid_read_context_check(); ++ ++ prmid = logical_rmid_to_physical_rmid(cpu, rmid); ++ ret = __rmid_read_phys(prmid, eventid, &msr_val); ++ if (ret) ++ return ret; ++ ++ *val = get_corrected_val(r, d, rmid, eventid, msr_val); + + return 0; + } diff --git a/queue-6.12/xfs-fix-log-crc-mismatches-between-i386-and-other-architectures.patch b/queue-6.12/xfs-fix-log-crc-mismatches-between-i386-and-other-architectures.patch new file mode 100644 index 0000000000..75119190b9 --- /dev/null +++ b/queue-6.12/xfs-fix-log-crc-mismatches-between-i386-and-other-architectures.patch @@ -0,0 +1,172 @@ +From stable+bounces-188052-greg=kroah.com@vger.kernel.org Mon Oct 20 14:46:47 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 08:43:58 -0400 +Subject: xfs: fix log CRC mismatches between i386 and other architectures +To: stable@vger.kernel.org +Cc: Christoph Hellwig , Carlos Maiolino , Sasha Levin +Message-ID: <20251020124358.1756227-2-sashal@kernel.org> + +From: Christoph Hellwig + +[ Upstream commit e747883c7d7306acb4d683038d881528fbfbe749 ] + +When mounting file systems with a log that was dirtied on i386 on +other architectures or vice versa, log recovery is unhappy: + +[ 11.068052] XFS (vdb): Torn write (CRC failure) detected at log block 0x2. Truncating head block from 0xc. + +This is because the CRCs generated by i386 and other architectures +always diff. The reason for that is that sizeof(struct xlog_rec_header) +returns different values for i386 vs the rest (324 vs 328), because the +struct is not sizeof(uint64_t) aligned, and i386 has odd struct size +alignment rules. + +This issue goes back to commit 13cdc853c519 ("Add log versioning, and new +super block field for the log stripe") in the xfs-import tree, which +adds log v2 support and the h_size field that causes the unaligned size. +At that time it only mattered for the crude debug only log header +checksum, but with commit 0e446be44806 ("xfs: add CRC checks to the log") +it became a real issue for v5 file system, because now there is a proper +CRC, and regular builds actually expect it match. + +Fix this by allowing checksums with and without the padding. + +Fixes: 0e446be44806 ("xfs: add CRC checks to the log") +Cc: # v3.8 +Signed-off-by: Christoph Hellwig +Signed-off-by: Carlos Maiolino +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_log_format.h | 30 +++++++++++++++++++++++++++++- + fs/xfs/libxfs/xfs_ondisk.h | 2 ++ + fs/xfs/xfs_log.c | 8 ++++---- + fs/xfs/xfs_log_priv.h | 4 ++-- + fs/xfs/xfs_log_recover.c | 19 +++++++++++++++++-- + 5 files changed, 54 insertions(+), 9 deletions(-) + +--- a/fs/xfs/libxfs/xfs_log_format.h ++++ b/fs/xfs/libxfs/xfs_log_format.h +@@ -174,12 +174,40 @@ typedef struct xlog_rec_header { + __be32 h_prev_block; /* block number to previous LR : 4 */ + __be32 h_num_logops; /* number of log operations in this LR : 4 */ + __be32 h_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE]; +- /* new fields */ ++ ++ /* fields added by the Linux port: */ + __be32 h_fmt; /* format of log record : 4 */ + uuid_t h_fs_uuid; /* uuid of FS : 16 */ ++ ++ /* fields added for log v2: */ + __be32 h_size; /* iclog size : 4 */ ++ ++ /* ++ * When h_size added for log v2 support, it caused structure to have ++ * a different size on i386 vs all other architectures because the ++ * sum of the size ofthe member is not aligned by that of the largest ++ * __be64-sized member, and i386 has really odd struct alignment rules. ++ * ++ * Due to the way the log headers are placed out on-disk that alone is ++ * not a problem becaue the xlog_rec_header always sits alone in a ++ * BBSIZEs area, and the rest of that area is padded with zeroes. ++ * But xlog_cksum used to calculate the checksum based on the structure ++ * size, and thus gives different checksums for i386 vs the rest. ++ * We now do two checksum validation passes for both sizes to allow ++ * moving v5 file systems with unclean logs between i386 and other ++ * (little-endian) architectures. ++ */ ++ __u32 h_pad0; + } xlog_rec_header_t; + ++#ifdef __i386__ ++#define XLOG_REC_SIZE offsetofend(struct xlog_rec_header, h_size) ++#define XLOG_REC_SIZE_OTHER sizeof(struct xlog_rec_header) ++#else ++#define XLOG_REC_SIZE sizeof(struct xlog_rec_header) ++#define XLOG_REC_SIZE_OTHER offsetofend(struct xlog_rec_header, h_size) ++#endif /* __i386__ */ ++ + typedef struct xlog_rec_ext_header { + __be32 xh_cycle; /* write cycle of log : 4 */ + __be32 xh_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE]; /* : 256 */ +--- a/fs/xfs/libxfs/xfs_ondisk.h ++++ b/fs/xfs/libxfs/xfs_ondisk.h +@@ -149,6 +149,8 @@ xfs_check_ondisk_structs(void) + XFS_CHECK_STRUCT_SIZE(struct xfs_rud_log_format, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_map_extent, 32); + XFS_CHECK_STRUCT_SIZE(struct xfs_phys_extent, 16); ++ XFS_CHECK_STRUCT_SIZE(struct xlog_rec_header, 328); ++ XFS_CHECK_STRUCT_SIZE(struct xlog_rec_ext_header, 260); + + XFS_CHECK_OFFSET(struct xfs_bui_log_format, bui_extents, 16); + XFS_CHECK_OFFSET(struct xfs_cui_log_format, cui_extents, 16); +--- a/fs/xfs/xfs_log.c ++++ b/fs/xfs/xfs_log.c +@@ -1567,13 +1567,13 @@ xlog_cksum( + struct xlog *log, + struct xlog_rec_header *rhead, + char *dp, +- int size) ++ unsigned int hdrsize, ++ unsigned int size) + { + uint32_t crc; + + /* first generate the crc for the record header ... */ +- crc = xfs_start_cksum_update((char *)rhead, +- sizeof(struct xlog_rec_header), ++ crc = xfs_start_cksum_update((char *)rhead, hdrsize, + offsetof(struct xlog_rec_header, h_crc)); + + /* ... then for additional cycle data for v2 logs ... */ +@@ -1837,7 +1837,7 @@ xlog_sync( + + /* calculcate the checksum */ + iclog->ic_header.h_crc = xlog_cksum(log, &iclog->ic_header, +- iclog->ic_datap, size); ++ iclog->ic_datap, XLOG_REC_SIZE, size); + /* + * Intentionally corrupt the log record CRC based on the error injection + * frequency, if defined. This facilitates testing log recovery in the +--- a/fs/xfs/xfs_log_priv.h ++++ b/fs/xfs/xfs_log_priv.h +@@ -498,8 +498,8 @@ xlog_recover_finish( + extern void + xlog_recover_cancel(struct xlog *); + +-extern __le32 xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead, +- char *dp, int size); ++__le32 xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead, ++ char *dp, unsigned int hdrsize, unsigned int size); + + extern struct kmem_cache *xfs_log_ticket_cache; + struct xlog_ticket *xlog_ticket_alloc(struct xlog *log, int unit_bytes, +--- a/fs/xfs/xfs_log_recover.c ++++ b/fs/xfs/xfs_log_recover.c +@@ -2890,9 +2890,24 @@ xlog_recover_process( + int pass, + struct list_head *buffer_list) + { +- __le32 expected_crc = rhead->h_crc, crc; ++ __le32 expected_crc = rhead->h_crc, crc, other_crc; + +- crc = xlog_cksum(log, rhead, dp, be32_to_cpu(rhead->h_len)); ++ crc = xlog_cksum(log, rhead, dp, XLOG_REC_SIZE, ++ be32_to_cpu(rhead->h_len)); ++ ++ /* ++ * Look at the end of the struct xlog_rec_header definition in ++ * xfs_log_format.h for the glory details. ++ */ ++ if (expected_crc && crc != expected_crc) { ++ other_crc = xlog_cksum(log, rhead, dp, XLOG_REC_SIZE_OTHER, ++ be32_to_cpu(rhead->h_len)); ++ if (other_crc == expected_crc) { ++ xfs_notice_once(log->l_mp, ++ "Fixing up incorrect CRC due to padding."); ++ crc = other_crc; ++ } ++ } + + /* + * Nothing else to do if this is a CRC verification pass. Just return diff --git a/queue-6.12/xfs-rename-the-old_crc-variable-in-xlog_recover_process.patch b/queue-6.12/xfs-rename-the-old_crc-variable-in-xlog_recover_process.patch new file mode 100644 index 0000000000..2607728051 --- /dev/null +++ b/queue-6.12/xfs-rename-the-old_crc-variable-in-xlog_recover_process.patch @@ -0,0 +1,68 @@ +From stable+bounces-188051-greg=kroah.com@vger.kernel.org Mon Oct 20 14:44:07 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 08:43:57 -0400 +Subject: xfs: rename the old_crc variable in xlog_recover_process +To: stable@vger.kernel.org +Cc: Christoph Hellwig , "Darrick J. Wong" , Carlos Maiolino , Sasha Levin +Message-ID: <20251020124358.1756227-1-sashal@kernel.org> + +From: Christoph Hellwig + +[ Upstream commit 0b737f4ac1d3ec093347241df74bbf5f54a7e16c ] + +old_crc is a very misleading name. Rename it to expected_crc as that +described the usage much better. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Darrick J. Wong +Signed-off-by: Carlos Maiolino +Stable-dep-of: e747883c7d73 ("xfs: fix log CRC mismatches between i386 and other architectures") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_log_recover.c | 17 ++++++++--------- + 1 file changed, 8 insertions(+), 9 deletions(-) + +--- a/fs/xfs/xfs_log_recover.c ++++ b/fs/xfs/xfs_log_recover.c +@@ -2890,20 +2890,19 @@ xlog_recover_process( + int pass, + struct list_head *buffer_list) + { +- __le32 old_crc = rhead->h_crc; +- __le32 crc; ++ __le32 expected_crc = rhead->h_crc, crc; + + crc = xlog_cksum(log, rhead, dp, be32_to_cpu(rhead->h_len)); + + /* + * Nothing else to do if this is a CRC verification pass. Just return + * if this a record with a non-zero crc. Unfortunately, mkfs always +- * sets old_crc to 0 so we must consider this valid even on v5 supers. +- * Otherwise, return EFSBADCRC on failure so the callers up the stack +- * know precisely what failed. ++ * sets expected_crc to 0 so we must consider this valid even on v5 ++ * supers. Otherwise, return EFSBADCRC on failure so the callers up the ++ * stack know precisely what failed. + */ + if (pass == XLOG_RECOVER_CRCPASS) { +- if (old_crc && crc != old_crc) ++ if (expected_crc && crc != expected_crc) + return -EFSBADCRC; + return 0; + } +@@ -2914,11 +2913,11 @@ xlog_recover_process( + * zero CRC check prevents warnings from being emitted when upgrading + * the kernel from one that does not add CRCs by default. + */ +- if (crc != old_crc) { +- if (old_crc || xfs_has_crc(log->l_mp)) { ++ if (crc != expected_crc) { ++ if (expected_crc || xfs_has_crc(log->l_mp)) { + xfs_alert(log->l_mp, + "log record CRC mismatch: found 0x%x, expected 0x%x.", +- le32_to_cpu(old_crc), ++ le32_to_cpu(expected_crc), + le32_to_cpu(crc)); + xfs_hex_dump(dp, 32); + } diff --git a/queue-6.12/xfs-use-deferred-intent-items-for-reaping-crosslinked-blocks.patch b/queue-6.12/xfs-use-deferred-intent-items-for-reaping-crosslinked-blocks.patch new file mode 100644 index 0000000000..e52c520b53 --- /dev/null +++ b/queue-6.12/xfs-use-deferred-intent-items-for-reaping-crosslinked-blocks.patch @@ -0,0 +1,55 @@ +From stable+bounces-188157-greg=kroah.com@vger.kernel.org Mon Oct 20 17:49:59 2025 +From: Sasha Levin +Date: Mon, 20 Oct 2025 11:49:51 -0400 +Subject: xfs: use deferred intent items for reaping crosslinked blocks +To: stable@vger.kernel.org +Cc: "Darrick J. Wong" , Christoph Hellwig , Sasha Levin +Message-ID: <20251020154951.1825215-1-sashal@kernel.org> + +From: "Darrick J. Wong" + +[ Upstream commit cd32a0c0dcdf634f2e0e71f41c272e19dece6264 ] + +When we're removing rmap records for crosslinked blocks, use deferred +intent items so that we can try to free/unmap as many of the old data +structure's blocks as we can in the same transaction as the commit. + +Cc: # v6.6 +Fixes: 1c7ce115e52106 ("xfs: reap large AG metadata extents when possible") +Signed-off-by: "Darrick J. Wong" +Reviewed-by: Christoph Hellwig +[ adapted xfs_refcount_free_cow_extent() and xfs_rmap_free_extent() ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/scrub/reap.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +--- a/fs/xfs/scrub/reap.c ++++ b/fs/xfs/scrub/reap.c +@@ -409,8 +409,6 @@ xreap_agextent_iter( + if (crosslinked) { + trace_xreap_dispose_unmap_extent(sc->sa.pag, agbno, *aglenp); + +- rs->force_roll = true; +- + if (rs->oinfo == &XFS_RMAP_OINFO_COW) { + /* + * If we're unmapping CoW staging extents, remove the +@@ -418,11 +416,14 @@ xreap_agextent_iter( + * rmap record as well. + */ + xfs_refcount_free_cow_extent(sc->tp, fsbno, *aglenp); ++ rs->force_roll = true; + return 0; + } + +- return xfs_rmap_free(sc->tp, sc->sa.agf_bp, sc->sa.pag, agbno, +- *aglenp, rs->oinfo); ++ xfs_rmap_free_extent(sc->tp, sc->sa.pag->pag_agno, agbno, ++ *aglenp, rs->oinfo->oi_owner); ++ rs->deferred++; ++ return 0; + } + + trace_xreap_dispose_free_extent(sc->sa.pag, agbno, *aglenp); -- 2.47.3