6.12-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 21 Oct 2025 18:01:08 +0000 (20:01 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 21 Oct 2025 18:01:08 +0000 (20:01 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 21 Oct 2025 18:01:08 +0000 (20:01 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 21 Oct 2025 18:01:08 +0000 (20:01 +0200)
diff --git a/queue-6.12/d_alloc_parallel-set-dcache_par_lookup-earlier.patch b/queue-6.12/d_alloc_parallel-set-dcache_par_lookup-earlier.patch

new file mode 100644 (file)

index 0000000..5b7cc71
--- /dev/null
+++ b/queue-6.12/d_alloc_parallel-set-dcache_par_lookup-earlier.patch
@@ -0,0 +1,70 @@
+From stable+bounces-188211-greg=kroah.com@vger.kernel.org Mon Oct 20 19:29:08 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Oct 2025 13:28:58 -0400
+Subject: d_alloc_parallel(): set DCACHE_PAR_LOOKUP earlier
+To: stable@vger.kernel.org
+Cc: Al Viro <viro@zeniv.linux.org.uk>, Christian Brauner <brauner@kernel.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251020172900.1851256-1-sashal@kernel.org>
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+[ Upstream commit e95db51c81f54dd12ea465b5127e4786f62a1095 ]
+
+Do that before new dentry is visible anywhere.  It does create
+a new possible state for dentries present in ->d_children/->d_sib -
+DCACHE_PAR_LOOKUP present, negative, unhashed, not in in-lookup
+hash chains, refcount positive.  Those are going to be skipped
+by all tree-walkers (both d_walk() callbacks in fs/dcache.c and
+explicit loops over children/sibling lists elsewhere) and
+dput() is fine with those.
+
+NOTE: dropping the final reference to a "normal" in-lookup dentry
+(in in-lookup hash) is a bug - somebody must've forgotten to
+call d_lookup_done() on it and bad things will happen.  With those
+it's OK; if/when we get around to making __dentry_kill() complain
+about such breakage, remember that predicate to check should
+*not* be just d_in_lookup(victim) but rather a combination of that
+with !hlist_bl_unhashed(&victim->d_u.d_in_lookup_hash).  Might
+be worth considering later...
+
+Reviewed-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Stable-dep-of: 56094ad3eaa2 ("vfs: Don't leak disconnected dentries on umount")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/dcache.c |   10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+--- a/fs/dcache.c
++++ b/fs/dcache.c
+@@ -2475,13 +2475,19 @@ struct dentry *d_alloc_parallel(struct d
+       unsigned int hash = name->hash;
+       struct hlist_bl_head *b = in_lookup_hash(parent, hash);
+       struct hlist_bl_node *node;
+-      struct dentry *new = d_alloc(parent, name);
++      struct dentry *new = __d_alloc(parent->d_sb, name);
+       struct dentry *dentry;
+       unsigned seq, r_seq, d_seq;
+ 
+       if (unlikely(!new))
+               return ERR_PTR(-ENOMEM);
+ 
++      new->d_flags |= DCACHE_PAR_LOOKUP;
++      spin_lock(&parent->d_lock);
++      new->d_parent = dget_dlock(parent);
++      hlist_add_head(&new->d_sib, &parent->d_children);
++      spin_unlock(&parent->d_lock);
++
+ retry:
+       rcu_read_lock();
+       seq = smp_load_acquire(&parent->d_inode->i_dir_seq);
+@@ -2565,8 +2571,6 @@ retry:
+               return dentry;
+       }
+       rcu_read_unlock();
+-      /* we can't take ->d_lock here; it's OK, though. */
+-      new->d_flags |= DCACHE_PAR_LOOKUP;
+       new->d_wait = wq;
+       hlist_bl_add_head(&new->d_u.d_in_lookup_hash, b);
+       hlist_bl_unlock(b);
diff --git a/queue-6.12/hfsplus-fix-slab-out-of-bounds-read-in-hfsplus_strcasecmp.patch b/queue-6.12/hfsplus-fix-slab-out-of-bounds-read-in-hfsplus_strcasecmp.patch

new file mode 100644 (file)

index 0000000..0660fa2
--- /dev/null
+++ b/queue-6.12/hfsplus-fix-slab-out-of-bounds-read-in-hfsplus_strcasecmp.patch
@@ -0,0 +1,223 @@
+From 42520df65bf67189541a425f7d36b0b3e7bd7844 Mon Sep 17 00:00:00 2001
+From: Viacheslav Dubeyko <slava@dubeyko.com>
+Date: Fri, 19 Sep 2025 12:12:44 -0700
+Subject: hfsplus: fix slab-out-of-bounds read in hfsplus_strcasecmp()
+
+From: Viacheslav Dubeyko <slava@dubeyko.com>
+
+commit 42520df65bf67189541a425f7d36b0b3e7bd7844 upstream.
+
+The hfsplus_strcasecmp() logic can trigger the issue:
+
+[  117.317703][ T9855] ==================================================================
+[  117.318353][ T9855] BUG: KASAN: slab-out-of-bounds in hfsplus_strcasecmp+0x1bc/0x490
+[  117.318991][ T9855] Read of size 2 at addr ffff88802160f40c by task repro/9855
+[  117.319577][ T9855]
+[  117.319773][ T9855] CPU: 0 UID: 0 PID: 9855 Comm: repro Not tainted 6.17.0-rc6 #33 PREEMPT(full)
+[  117.319780][ T9855] Hardware name: QEMU Ubuntu 24.04 PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
+[  117.319783][ T9855] Call Trace:
+[  117.319785][ T9855]  <TASK>
+[  117.319788][ T9855]  dump_stack_lvl+0x1c1/0x2a0
+[  117.319795][ T9855]  ? __virt_addr_valid+0x1c8/0x5c0
+[  117.319803][ T9855]  ? __pfx_dump_stack_lvl+0x10/0x10
+[  117.319808][ T9855]  ? rcu_is_watching+0x15/0xb0
+[  117.319816][ T9855]  ? lock_release+0x4b/0x3e0
+[  117.319821][ T9855]  ? __kasan_check_byte+0x12/0x40
+[  117.319828][ T9855]  ? __virt_addr_valid+0x1c8/0x5c0
+[  117.319835][ T9855]  ? __virt_addr_valid+0x4a5/0x5c0
+[  117.319842][ T9855]  print_report+0x17e/0x7e0
+[  117.319848][ T9855]  ? __virt_addr_valid+0x1c8/0x5c0
+[  117.319855][ T9855]  ? __virt_addr_valid+0x4a5/0x5c0
+[  117.319862][ T9855]  ? __phys_addr+0xd3/0x180
+[  117.319869][ T9855]  ? hfsplus_strcasecmp+0x1bc/0x490
+[  117.319876][ T9855]  kasan_report+0x147/0x180
+[  117.319882][ T9855]  ? hfsplus_strcasecmp+0x1bc/0x490
+[  117.319891][ T9855]  hfsplus_strcasecmp+0x1bc/0x490
+[  117.319900][ T9855]  ? __pfx_hfsplus_cat_case_cmp_key+0x10/0x10
+[  117.319906][ T9855]  hfs_find_rec_by_key+0xa9/0x1e0
+[  117.319913][ T9855]  __hfsplus_brec_find+0x18e/0x470
+[  117.319920][ T9855]  ? __pfx_hfsplus_bnode_find+0x10/0x10
+[  117.319926][ T9855]  ? __pfx_hfs_find_rec_by_key+0x10/0x10
+[  117.319933][ T9855]  ? __pfx___hfsplus_brec_find+0x10/0x10
+[  117.319942][ T9855]  hfsplus_brec_find+0x28f/0x510
+[  117.319949][ T9855]  ? __pfx_hfs_find_rec_by_key+0x10/0x10
+[  117.319956][ T9855]  ? __pfx_hfsplus_brec_find+0x10/0x10
+[  117.319963][ T9855]  ? __kmalloc_noprof+0x2a9/0x510
+[  117.319969][ T9855]  ? hfsplus_find_init+0x8c/0x1d0
+[  117.319976][ T9855]  hfsplus_brec_read+0x2b/0x120
+[  117.319983][ T9855]  hfsplus_lookup+0x2aa/0x890
+[  117.319990][ T9855]  ? __pfx_hfsplus_lookup+0x10/0x10
+[  117.320003][ T9855]  ? d_alloc_parallel+0x2f0/0x15e0
+[  117.320008][ T9855]  ? __lock_acquire+0xaec/0xd80
+[  117.320013][ T9855]  ? __pfx_d_alloc_parallel+0x10/0x10
+[  117.320019][ T9855]  ? __raw_spin_lock_init+0x45/0x100
+[  117.320026][ T9855]  ? __init_waitqueue_head+0xa9/0x150
+[  117.320034][ T9855]  __lookup_slow+0x297/0x3d0
+[  117.320039][ T9855]  ? __pfx___lookup_slow+0x10/0x10
+[  117.320045][ T9855]  ? down_read+0x1ad/0x2e0
+[  117.320055][ T9855]  lookup_slow+0x53/0x70
+[  117.320065][ T9855]  walk_component+0x2f0/0x430
+[  117.320073][ T9855]  path_lookupat+0x169/0x440
+[  117.320081][ T9855]  filename_lookup+0x212/0x590
+[  117.320089][ T9855]  ? __pfx_filename_lookup+0x10/0x10
+[  117.320098][ T9855]  ? strncpy_from_user+0x150/0x290
+[  117.320105][ T9855]  ? getname_flags+0x1e5/0x540
+[  117.320112][ T9855]  user_path_at+0x3a/0x60
+[  117.320117][ T9855]  __x64_sys_umount+0xee/0x160
+[  117.320123][ T9855]  ? __pfx___x64_sys_umount+0x10/0x10
+[  117.320129][ T9855]  ? do_syscall_64+0xb7/0x3a0
+[  117.320135][ T9855]  ? entry_SYSCALL_64_after_hwframe+0x77/0x7f
+[  117.320141][ T9855]  ? entry_SYSCALL_64_after_hwframe+0x77/0x7f
+[  117.320145][ T9855]  do_syscall_64+0xf3/0x3a0
+[  117.320150][ T9855]  ? exc_page_fault+0x9f/0xf0
+[  117.320154][ T9855]  entry_SYSCALL_64_after_hwframe+0x77/0x7f
+[  117.320158][ T9855] RIP: 0033:0x7f7dd7908b07
+[  117.320163][ T9855] Code: 23 0d 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 31 f6 e9 09 00 00 00 66 0f 1f 84 00 00 08
+[  117.320167][ T9855] RSP: 002b:00007ffd5ebd9698 EFLAGS: 00000202 ORIG_RAX: 00000000000000a6
+[  117.320172][ T9855] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f7dd7908b07
+[  117.320176][ T9855] RDX: 0000000000000009 RSI: 0000000000000009 RDI: 00007ffd5ebd9740
+[  117.320179][ T9855] RBP: 00007ffd5ebda780 R08: 0000000000000005 R09: 00007ffd5ebd9530
+[  117.320181][ T9855] R10: 00007f7dd799bfc0 R11: 0000000000000202 R12: 000055e2008b32d0
+[  117.320184][ T9855] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
+[  117.320189][ T9855]  </TASK>
+[  117.320190][ T9855]
+[  117.351311][ T9855] Allocated by task 9855:
+[  117.351683][ T9855]  kasan_save_track+0x3e/0x80
+[  117.352093][ T9855]  __kasan_kmalloc+0x8d/0xa0
+[  117.352490][ T9855]  __kmalloc_noprof+0x288/0x510
+[  117.352914][ T9855]  hfsplus_find_init+0x8c/0x1d0
+[  117.353342][ T9855]  hfsplus_lookup+0x19c/0x890
+[  117.353747][ T9855]  __lookup_slow+0x297/0x3d0
+[  117.354148][ T9855]  lookup_slow+0x53/0x70
+[  117.354514][ T9855]  walk_component+0x2f0/0x430
+[  117.354921][ T9855]  path_lookupat+0x169/0x440
+[  117.355325][ T9855]  filename_lookup+0x212/0x590
+[  117.355740][ T9855]  user_path_at+0x3a/0x60
+[  117.356115][ T9855]  __x64_sys_umount+0xee/0x160
+[  117.356529][ T9855]  do_syscall_64+0xf3/0x3a0
+[  117.356920][ T9855]  entry_SYSCALL_64_after_hwframe+0x77/0x7f
+[  117.357429][ T9855]
+[  117.357636][ T9855] The buggy address belongs to the object at ffff88802160f000
+[  117.357636][ T9855]  which belongs to the cache kmalloc-2k of size 2048
+[  117.358827][ T9855] The buggy address is located 0 bytes to the right of
+[  117.358827][ T9855]  allocated 1036-byte region [ffff88802160f000, ffff88802160f40c)
+[  117.360061][ T9855]
+[  117.360266][ T9855] The buggy address belongs to the physical page:
+[  117.360813][ T9855] page: refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x21608
+[  117.361562][ T9855] head: order:3 mapcount:0 entire_mapcount:0 nr_pages_mapped:0 pincount:0
+[  117.362285][ T9855] flags: 0xfff00000000040(head|node=0|zone=1|lastcpupid=0x7ff)
+[  117.362929][ T9855] page_type: f5(slab)
+[  117.363282][ T9855] raw: 00fff00000000040 ffff88801a842f00 ffffea0000932000 dead000000000002
+[  117.364015][ T9855] raw: 0000000000000000 0000000080080008 00000000f5000000 0000000000000000
+[  117.364750][ T9855] head: 00fff00000000040 ffff88801a842f00 ffffea0000932000 dead000000000002
+[  117.365491][ T9855] head: 0000000000000000 0000000080080008 00000000f5000000 0000000000000000
+[  117.366232][ T9855] head: 00fff00000000003 ffffea0000858201 00000000ffffffff 00000000ffffffff
+[  117.366968][ T9855] head: ffffffffffffffff 0000000000000000 00000000ffffffff 0000000000000008
+[  117.367711][ T9855] page dumped because: kasan: bad access detected
+[  117.368259][ T9855] page_owner tracks the page as allocated
+[  117.368745][ T9855] page last allocated via order 3, migratetype Unmovable, gfp_mask 0xd20c0(__GFP_IO|__GFP_FS|__GFP_NOWARN1
+[  117.370541][ T9855]  post_alloc_hook+0x240/0x2a0
+[  117.370954][ T9855]  get_page_from_freelist+0x2101/0x21e0
+[  117.371435][ T9855]  __alloc_frozen_pages_noprof+0x274/0x380
+[  117.371935][ T9855]  alloc_pages_mpol+0x241/0x4b0
+[  117.372360][ T9855]  allocate_slab+0x8d/0x380
+[  117.372752][ T9855]  ___slab_alloc+0xbe3/0x1400
+[  117.373159][ T9855]  __kmalloc_cache_noprof+0x296/0x3d0
+[  117.373621][ T9855]  nexthop_net_init+0x75/0x100
+[  117.374038][ T9855]  ops_init+0x35c/0x5c0
+[  117.374400][ T9855]  setup_net+0x10c/0x320
+[  117.374768][ T9855]  copy_net_ns+0x31b/0x4d0
+[  117.375156][ T9855]  create_new_namespaces+0x3f3/0x720
+[  117.375613][ T9855]  unshare_nsproxy_namespaces+0x11c/0x170
+[  117.376094][ T9855]  ksys_unshare+0x4ca/0x8d0
+[  117.376477][ T9855]  __x64_sys_unshare+0x38/0x50
+[  117.376879][ T9855]  do_syscall_64+0xf3/0x3a0
+[  117.377265][ T9855] page last free pid 9110 tgid 9110 stack trace:
+[  117.377795][ T9855]  __free_frozen_pages+0xbeb/0xd50
+[  117.378229][ T9855]  __put_partials+0x152/0x1a0
+[  117.378625][ T9855]  put_cpu_partial+0x17c/0x250
+[  117.379026][ T9855]  __slab_free+0x2d4/0x3c0
+[  117.379404][ T9855]  qlist_free_all+0x97/0x140
+[  117.379790][ T9855]  kasan_quarantine_reduce+0x148/0x160
+[  117.380250][ T9855]  __kasan_slab_alloc+0x22/0x80
+[  117.380662][ T9855]  __kmalloc_noprof+0x232/0x510
+[  117.381074][ T9855]  tomoyo_supervisor+0xc0a/0x1360
+[  117.381498][ T9855]  tomoyo_env_perm+0x149/0x1e0
+[  117.381903][ T9855]  tomoyo_find_next_domain+0x15ad/0x1b90
+[  117.382378][ T9855]  tomoyo_bprm_check_security+0x11c/0x180
+[  117.382859][ T9855]  security_bprm_check+0x89/0x280
+[  117.383289][ T9855]  bprm_execve+0x8f1/0x14a0
+[  117.383673][ T9855]  do_execveat_common+0x528/0x6b0
+[  117.384103][ T9855]  __x64_sys_execve+0x94/0xb0
+[  117.384500][ T9855]
+[  117.384706][ T9855] Memory state around the buggy address:
+[  117.385179][ T9855]  ffff88802160f300: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+[  117.385854][ T9855]  ffff88802160f380: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+[  117.386534][ T9855] >ffff88802160f400: 00 04 fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+[  117.387204][ T9855]                       ^
+[  117.387566][ T9855]  ffff88802160f480: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+[  117.388243][ T9855]  ffff88802160f500: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+[  117.388918][ T9855] ==================================================================
+
+The issue takes place if the length field of struct hfsplus_unistr
+is bigger than HFSPLUS_MAX_STRLEN. The patch simply checks
+the length of comparing strings. And if the strings' length
+is bigger than HFSPLUS_MAX_STRLEN, then it is corrected
+to this value.
+
+v2
+The string length correction has been added for hfsplus_strcmp().
+
+Reported-by: Jiaming Zhang <r772577952@gmail.com>
+Signed-off-by: Viacheslav Dubeyko <slava@dubeyko.com>
+cc: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
+cc: Yangtao Li <frank.li@vivo.com>
+cc: linux-fsdevel@vger.kernel.org
+cc: syzkaller@googlegroups.com
+Link: https://lore.kernel.org/r/20250919191243.1370388-1-slava@dubeyko.com
+Signed-off-by: Viacheslav Dubeyko <slava@dubeyko.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/hfsplus/unicode.c |   24 ++++++++++++++++++++++++
+ 1 file changed, 24 insertions(+)
+
+--- a/fs/hfsplus/unicode.c
++++ b/fs/hfsplus/unicode.c
+@@ -40,6 +40,18 @@ int hfsplus_strcasecmp(const struct hfsp
+       p1 = s1->unicode;
+       p2 = s2->unicode;
+ 
++      if (len1 > HFSPLUS_MAX_STRLEN) {
++              len1 = HFSPLUS_MAX_STRLEN;
++              pr_err("invalid length %u has been corrected to %d\n",
++                      be16_to_cpu(s1->length), len1);
++      }
++
++      if (len2 > HFSPLUS_MAX_STRLEN) {
++              len2 = HFSPLUS_MAX_STRLEN;
++              pr_err("invalid length %u has been corrected to %d\n",
++                      be16_to_cpu(s2->length), len2);
++      }
++
+       while (1) {
+               c1 = c2 = 0;
+ 
+@@ -74,6 +86,18 @@ int hfsplus_strcmp(const struct hfsplus_
+       p1 = s1->unicode;
+       p2 = s2->unicode;
+ 
++      if (len1 > HFSPLUS_MAX_STRLEN) {
++              len1 = HFSPLUS_MAX_STRLEN;
++              pr_err("invalid length %u has been corrected to %d\n",
++                      be16_to_cpu(s1->length), len1);
++      }
++
++      if (len2 > HFSPLUS_MAX_STRLEN) {
++              len2 = HFSPLUS_MAX_STRLEN;
++              pr_err("invalid length %u has been corrected to %d\n",
++                      be16_to_cpu(s2->length), len2);
++      }
++
+       for (len = min(len1, len2); len > 0; len--) {
+               c1 = be16_to_cpu(*p1);
+               c2 = be16_to_cpu(*p2);
diff --git a/queue-6.12/iio-imu-inv_icm42600-avoid-configuring-if-already-pm_runtime-suspended.patch b/queue-6.12/iio-imu-inv_icm42600-avoid-configuring-if-already-pm_runtime-suspended.patch

new file mode 100644 (file)

index 0000000..b401a5d
--- /dev/null
+++ b/queue-6.12/iio-imu-inv_icm42600-avoid-configuring-if-already-pm_runtime-suspended.patch
@@ -0,0 +1,66 @@
+From stable+bounces-188108-greg=kroah.com@vger.kernel.org Mon Oct 20 15:08:22 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Oct 2025 09:08:10 -0400
+Subject: iio: imu: inv_icm42600: Avoid configuring if already pm_runtime suspended
+To: stable@vger.kernel.org
+Cc: Sean Nyekjaer <sean@geanix.com>, Stable@vger.kernel.org, Jonathan Cameron <Jonathan.Cameron@huawei.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251020130810.1766634-1-sashal@kernel.org>
+
+From: Sean Nyekjaer <sean@geanix.com>
+
+[ Upstream commit 466f7a2fef2a4e426f809f79845a1ec1aeb558f4 ]
+
+Do as in suspend, skip resume configuration steps if the device is already
+pm_runtime suspended. This avoids reconfiguring a device that is already
+in the correct low-power state and ensures that pm_runtime handles the
+power state transitions properly.
+
+Fixes: 31c24c1e93c3 ("iio: imu: inv_icm42600: add core of new inv_icm42600 driver")
+Signed-off-by: Sean Nyekjaer <sean@geanix.com>
+Link: https://patch.msgid.link/20250901-icm42pmreg-v3-3-ef1336246960@geanix.com
+Cc: <Stable@vger.kernel.org>
+Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
+[ removed apex/wakeup variable declarations ]
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/iio/imu/inv_icm42600/inv_icm42600_core.c |   11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+--- a/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c
++++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c
+@@ -787,17 +787,15 @@ EXPORT_SYMBOL_NS_GPL(inv_icm42600_core_p
+ static int inv_icm42600_suspend(struct device *dev)
+ {
+       struct inv_icm42600_state *st = dev_get_drvdata(dev);
+-      int ret;
++      int ret = 0;
+ 
+       mutex_lock(&st->lock);
+ 
+       st->suspended.gyro = st->conf.gyro.mode;
+       st->suspended.accel = st->conf.accel.mode;
+       st->suspended.temp = st->conf.temp_en;
+-      if (pm_runtime_suspended(dev)) {
+-              ret = 0;
++      if (pm_runtime_suspended(dev))
+               goto out_unlock;
+-      }
+ 
+       /* disable FIFO data streaming */
+       if (st->fifo.on) {
+@@ -829,10 +827,13 @@ static int inv_icm42600_resume(struct de
+       struct inv_icm42600_state *st = dev_get_drvdata(dev);
+       struct inv_icm42600_sensor_state *gyro_st = iio_priv(st->indio_gyro);
+       struct inv_icm42600_sensor_state *accel_st = iio_priv(st->indio_accel);
+-      int ret;
++      int ret = 0;
+ 
+       mutex_lock(&st->lock);
+ 
++      if (pm_runtime_suspended(dev))
++              goto out_unlock;
++
+       ret = inv_icm42600_enable_regulator_vddio(st);
+       if (ret)
+               goto out_unlock;
diff --git a/queue-6.12/iio-imu-inv_icm42600-simplify-pm_runtime-setup.patch b/queue-6.12/iio-imu-inv_icm42600-simplify-pm_runtime-setup.patch

new file mode 100644 (file)

index 0000000..b3e27b6
--- /dev/null
+++ b/queue-6.12/iio-imu-inv_icm42600-simplify-pm_runtime-setup.patch
@@ -0,0 +1,86 @@
+From stable+bounces-188095-greg=kroah.com@vger.kernel.org Mon Oct 20 15:06:21 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Oct 2025 09:02:39 -0400
+Subject: iio: imu: inv_icm42600: Simplify pm_runtime setup
+To: stable@vger.kernel.org
+Cc: Sean Nyekjaer <sean@geanix.com>, Stable@vger.kernel.org, Jonathan Cameron <Jonathan.Cameron@huawei.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251020130239.1763909-2-sashal@kernel.org>
+
+From: Sean Nyekjaer <sean@geanix.com>
+
+[ Upstream commit 0792c1984a45ccd7a296d6b8cb78088bc99a212e ]
+
+Rework the power management in inv_icm42600_core_probe() to use
+devm_pm_runtime_set_active_enabled(), which simplifies the runtime PM
+setup by handling activation and enabling in one step.
+Remove the separate inv_icm42600_disable_pm callback, as it's no longer
+needed with the devm-managed approach.
+Using devm_pm_runtime_enable() also fixes the missing disable of
+autosuspend.
+Update inv_icm42600_disable_vddio_reg() to only disable the regulator if
+the device is not suspended i.e. powered-down, preventing unbalanced
+disables.
+Also remove redundant error msg on regulator_disable(), the regulator
+framework already emits an error message when regulator_disable() fails.
+
+This simplifies the PM setup and avoids manipulating the usage counter
+unnecessarily.
+
+Fixes: 31c24c1e93c3 ("iio: imu: inv_icm42600: add core of new inv_icm42600 driver")
+Signed-off-by: Sean Nyekjaer <sean@geanix.com>
+Link: https://patch.msgid.link/20250901-icm42pmreg-v3-1-ef1336246960@geanix.com
+Cc: <Stable@vger.kernel.org>
+Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/iio/imu/inv_icm42600/inv_icm42600_core.c |   24 ++++++-----------------
+ 1 file changed, 7 insertions(+), 17 deletions(-)
+
+--- a/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c
++++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c
+@@ -667,20 +667,12 @@ static void inv_icm42600_disable_vdd_reg
+ static void inv_icm42600_disable_vddio_reg(void *_data)
+ {
+       struct inv_icm42600_state *st = _data;
+-      const struct device *dev = regmap_get_device(st->map);
+-      int ret;
+-
+-      ret = regulator_disable(st->vddio_supply);
+-      if (ret)
+-              dev_err(dev, "failed to disable vddio error %d\n", ret);
+-}
++      struct device *dev = regmap_get_device(st->map);
+ 
+-static void inv_icm42600_disable_pm(void *_data)
+-{
+-      struct device *dev = _data;
++      if (pm_runtime_status_suspended(dev))
++              return;
+ 
+-      pm_runtime_put_sync(dev);
+-      pm_runtime_disable(dev);
++      regulator_disable(st->vddio_supply);
+ }
+ 
+ int inv_icm42600_core_probe(struct regmap *regmap, int chip, int irq,
+@@ -777,16 +769,14 @@ int inv_icm42600_core_probe(struct regma
+               return ret;
+ 
+       /* setup runtime power management */
+-      ret = pm_runtime_set_active(dev);
++      ret = devm_pm_runtime_set_active_enabled(dev);
+       if (ret)
+               return ret;
+-      pm_runtime_get_noresume(dev);
+-      pm_runtime_enable(dev);
++
+       pm_runtime_set_autosuspend_delay(dev, INV_ICM42600_SUSPEND_DELAY_MS);
+       pm_runtime_use_autosuspend(dev);
+-      pm_runtime_put(dev);
+ 
+-      return devm_add_action_or_reset(dev, inv_icm42600_disable_pm, dev);
++      return ret;
+ }
+ EXPORT_SYMBOL_NS_GPL(inv_icm42600_core_probe, IIO_ICM42600);
+ 
diff --git a/queue-6.12/ipv4-adopt-dst_dev-skb_dst_dev-and-skb_dst_dev_net.patch b/queue-6.12/ipv4-adopt-dst_dev-skb_dst_dev-and-skb_dst_dev_net.patch

new file mode 100644 (file)

index 0000000..a3c66fd
--- /dev/null
+++ b/queue-6.12/ipv4-adopt-dst_dev-skb_dst_dev-and-skb_dst_dev_net.patch
@@ -0,0 +1,384 @@
+From stable+bounces-188150-greg=kroah.com@vger.kernel.org Mon Oct 20 17:47:07 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Oct 2025 11:44:05 -0400
+Subject: ipv4: adopt dst_dev, skb_dst_dev and skb_dst_dev_net[_rcu]
+To: stable@vger.kernel.org
+Cc: Eric Dumazet <edumazet@google.com>, Kuniyuki Iwashima <kuniyu@google.com>, Jakub Kicinski <kuba@kernel.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251020154409.1823664-4-sashal@kernel.org>
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit a74fc62eec155ca5a6da8ff3856f3dc87fe24558 ]
+
+Use the new helpers as a first step to deal with
+potential dst->dev races.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
+Link: https://patch.msgid.link/20250630121934.3399505-8-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 833d4313bc1e ("mptcp: reset blackhole on success with non-loopback ifaces")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/inet_hashtables.h |    2 +-
+ include/net/ip.h              |   11 ++++++-----
+ include/net/route.h           |    2 +-
+ net/ipv4/icmp.c               |   24 +++++++++++++-----------
+ net/ipv4/igmp.c               |    2 +-
+ net/ipv4/ip_fragment.c        |    2 +-
+ net/ipv4/ip_output.c          |    6 +++---
+ net/ipv4/ip_vti.c             |    4 ++--
+ net/ipv4/netfilter.c          |    4 ++--
+ net/ipv4/route.c              |    8 ++++----
+ net/ipv4/tcp_fastopen.c       |    4 +++-
+ net/ipv4/tcp_ipv4.c           |    2 +-
+ net/ipv4/tcp_metrics.c        |    8 ++++----
+ net/ipv4/xfrm4_output.c       |    2 +-
+ 14 files changed, 43 insertions(+), 38 deletions(-)
+
+--- a/include/net/inet_hashtables.h
++++ b/include/net/inet_hashtables.h
+@@ -492,7 +492,7 @@ static inline struct sock *__inet_lookup
+                                            const int sdif,
+                                            bool *refcounted)
+ {
+-      struct net *net = dev_net_rcu(skb_dst(skb)->dev);
++      struct net *net = skb_dst_dev_net_rcu(skb);
+       const struct iphdr *iph = ip_hdr(skb);
+       struct sock *sk;
+ 
+--- a/include/net/ip.h
++++ b/include/net/ip.h
+@@ -475,7 +475,7 @@ static inline unsigned int ip_dst_mtu_ma
+ 
+       rcu_read_lock();
+ 
+-      net = dev_net_rcu(dst->dev);
++      net = dev_net_rcu(dst_dev(dst));
+       if (READ_ONCE(net->ipv4.sysctl_ip_fwd_use_pmtu) ||
+           ip_mtu_locked(dst) ||
+           !forwarding) {
+@@ -489,7 +489,7 @@ static inline unsigned int ip_dst_mtu_ma
+       if (mtu)
+               goto out;
+ 
+-      mtu = READ_ONCE(dst->dev->mtu);
++      mtu = READ_ONCE(dst_dev(dst)->mtu);
+ 
+       if (unlikely(ip_mtu_locked(dst))) {
+               if (rt->rt_uses_gateway && mtu > 576)
+@@ -509,16 +509,17 @@ out:
+ static inline unsigned int ip_skb_dst_mtu(struct sock *sk,
+                                         const struct sk_buff *skb)
+ {
++      const struct dst_entry *dst = skb_dst(skb);
+       unsigned int mtu;
+ 
+       if (!sk || !sk_fullsock(sk) || ip_sk_use_pmtu(sk)) {
+               bool forwarding = IPCB(skb)->flags & IPSKB_FORWARDED;
+ 
+-              return ip_dst_mtu_maybe_forward(skb_dst(skb), forwarding);
++              return ip_dst_mtu_maybe_forward(dst, forwarding);
+       }
+ 
+-      mtu = min(READ_ONCE(skb_dst(skb)->dev->mtu), IP_MAX_MTU);
+-      return mtu - lwtunnel_headroom(skb_dst(skb)->lwtstate, mtu);
++      mtu = min(READ_ONCE(dst_dev(dst)->mtu), IP_MAX_MTU);
++      return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
+ }
+ 
+ struct dst_metrics *ip_fib_metrics_init(struct nlattr *fc_mx, int fc_mx_len,
+--- a/include/net/route.h
++++ b/include/net/route.h
+@@ -369,7 +369,7 @@ static inline int ip4_dst_hoplimit(const
+               const struct net *net;
+ 
+               rcu_read_lock();
+-              net = dev_net_rcu(dst->dev);
++              net = dev_net_rcu(dst_dev(dst));
+               hoplimit = READ_ONCE(net->ipv4.sysctl_ip_default_ttl);
+               rcu_read_unlock();
+       }
+--- a/net/ipv4/icmp.c
++++ b/net/ipv4/icmp.c
+@@ -311,18 +311,20 @@ static bool icmpv4_xrlim_allow(struct ne
+ {
+       struct dst_entry *dst = &rt->dst;
+       struct inet_peer *peer;
++      struct net_device *dev;
+       bool rc = true;
+ 
+       if (!apply_ratelimit)
+               return true;
+ 
+       /* No rate limit on loopback */
+-      if (dst->dev && (dst->dev->flags&IFF_LOOPBACK))
++      dev = dst_dev(dst);
++      if (dev && (dev->flags & IFF_LOOPBACK))
+               goto out;
+ 
+       rcu_read_lock();
+       peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr,
+-                             l3mdev_master_ifindex_rcu(dst->dev));
++                             l3mdev_master_ifindex_rcu(dev));
+       rc = inet_peer_xrlim_allow(peer,
+                                  READ_ONCE(net->ipv4.sysctl_icmp_ratelimit));
+       rcu_read_unlock();
+@@ -468,13 +470,13 @@ out_bh_enable:
+  */
+ static struct net_device *icmp_get_route_lookup_dev(struct sk_buff *skb)
+ {
+-      struct net_device *route_lookup_dev = NULL;
++      struct net_device *dev = skb->dev;
++      const struct dst_entry *dst;
+ 
+-      if (skb->dev)
+-              route_lookup_dev = skb->dev;
+-      else if (skb_dst(skb))
+-              route_lookup_dev = skb_dst(skb)->dev;
+-      return route_lookup_dev;
++      if (dev)
++              return dev;
++      dst = skb_dst(skb);
++      return dst ? dst_dev(dst) : NULL;
+ }
+ 
+ static struct rtable *icmp_route_lookup(struct net *net, struct flowi4 *fl4,
+@@ -873,7 +875,7 @@ static enum skb_drop_reason icmp_unreach
+       struct net *net;
+       u32 info = 0;
+ 
+-      net = dev_net_rcu(skb_dst(skb)->dev);
++      net = skb_dst_dev_net_rcu(skb);
+ 
+       /*
+        *      Incomplete header ?
+@@ -1016,7 +1018,7 @@ static enum skb_drop_reason icmp_echo(st
+       struct icmp_bxm icmp_param;
+       struct net *net;
+ 
+-      net = dev_net_rcu(skb_dst(skb)->dev);
++      net = skb_dst_dev_net_rcu(skb);
+       /* should there be an ICMP stat for ignored echos? */
+       if (READ_ONCE(net->ipv4.sysctl_icmp_echo_ignore_all))
+               return SKB_NOT_DROPPED_YET;
+@@ -1186,7 +1188,7 @@ static enum skb_drop_reason icmp_timesta
+       return SKB_NOT_DROPPED_YET;
+ 
+ out_err:
+-      __ICMP_INC_STATS(dev_net_rcu(skb_dst(skb)->dev), ICMP_MIB_INERRORS);
++      __ICMP_INC_STATS(skb_dst_dev_net_rcu(skb), ICMP_MIB_INERRORS);
+       return SKB_DROP_REASON_PKT_TOO_SMALL;
+ }
+ 
+--- a/net/ipv4/igmp.c
++++ b/net/ipv4/igmp.c
+@@ -424,7 +424,7 @@ static int igmpv3_sendpack(struct sk_buf
+ 
+       pig->csum = ip_compute_csum(igmp_hdr(skb), igmplen);
+ 
+-      return ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb);
++      return ip_local_out(skb_dst_dev_net(skb), skb->sk, skb);
+ }
+ 
+ static int grec_size(struct ip_mc_list *pmc, int type, int gdel, int sdel)
+--- a/net/ipv4/ip_fragment.c
++++ b/net/ipv4/ip_fragment.c
+@@ -488,7 +488,7 @@ out_fail:
+ /* Process an incoming IP datagram fragment. */
+ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user)
+ {
+-      struct net_device *dev = skb->dev ? : skb_dst(skb)->dev;
++      struct net_device *dev = skb->dev ? : skb_dst_dev(skb);
+       int vif = l3mdev_master_ifindex_rcu(dev);
+       struct ipq *qp;
+ 
+--- a/net/ipv4/ip_output.c
++++ b/net/ipv4/ip_output.c
+@@ -117,7 +117,7 @@ int __ip_local_out(struct net *net, stru
+       skb->protocol = htons(ETH_P_IP);
+ 
+       return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
+-                     net, sk, skb, NULL, skb_dst(skb)->dev,
++                     net, sk, skb, NULL, skb_dst_dev(skb),
+                      dst_output);
+ }
+ 
+@@ -200,7 +200,7 @@ static int ip_finish_output2(struct net
+ {
+       struct dst_entry *dst = skb_dst(skb);
+       struct rtable *rt = dst_rtable(dst);
+-      struct net_device *dev = dst->dev;
++      struct net_device *dev = dst_dev(dst);
+       unsigned int hh_len = LL_RESERVED_SPACE(dev);
+       struct neighbour *neigh;
+       bool is_v6gw = false;
+@@ -426,7 +426,7 @@ int ip_mc_output(struct net *net, struct
+ 
+ int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+ {
+-      struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
++      struct net_device *dev = skb_dst_dev(skb), *indev = skb->dev;
+ 
+       skb->dev = dev;
+       skb->protocol = htons(ETH_P_IP);
+--- a/net/ipv4/ip_vti.c
++++ b/net/ipv4/ip_vti.c
+@@ -229,7 +229,7 @@ static netdev_tx_t vti_xmit(struct sk_bu
+               goto tx_error_icmp;
+       }
+ 
+-      tdev = dst->dev;
++      tdev = dst_dev(dst);
+ 
+       if (tdev == dev) {
+               dst_release(dst);
+@@ -259,7 +259,7 @@ static netdev_tx_t vti_xmit(struct sk_bu
+ xmit:
+       skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev)));
+       skb_dst_set(skb, dst);
+-      skb->dev = skb_dst(skb)->dev;
++      skb->dev = skb_dst_dev(skb);
+ 
+       err = dst_output(tunnel->net, skb->sk, skb);
+       if (net_xmit_eval(err) == 0)
+--- a/net/ipv4/netfilter.c
++++ b/net/ipv4/netfilter.c
+@@ -20,12 +20,12 @@
+ /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
+ int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, unsigned int addr_type)
+ {
++      struct net_device *dev = skb_dst_dev(skb);
+       const struct iphdr *iph = ip_hdr(skb);
+       struct rtable *rt;
+       struct flowi4 fl4 = {};
+       __be32 saddr = iph->saddr;
+       __u8 flags;
+-      struct net_device *dev = skb_dst(skb)->dev;
+       struct flow_keys flkeys;
+       unsigned int hh_len;
+ 
+@@ -74,7 +74,7 @@ int ip_route_me_harder(struct net *net,
+ #endif
+ 
+       /* Change in oif may mean change in hh_len. */
+-      hh_len = skb_dst(skb)->dev->hard_header_len;
++      hh_len = skb_dst_dev(skb)->hard_header_len;
+       if (skb_headroom(skb) < hh_len &&
+           pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)),
+                               0, GFP_ATOMIC))
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -413,7 +413,7 @@ static struct neighbour *ipv4_neigh_look
+                                          const void *daddr)
+ {
+       const struct rtable *rt = container_of(dst, struct rtable, dst);
+-      struct net_device *dev = dst->dev;
++      struct net_device *dev = dst_dev(dst);
+       struct neighbour *n;
+ 
+       rcu_read_lock();
+@@ -440,7 +440,7 @@ static struct neighbour *ipv4_neigh_look
+ static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr)
+ {
+       const struct rtable *rt = container_of(dst, struct rtable, dst);
+-      struct net_device *dev = dst->dev;
++      struct net_device *dev = dst_dev(dst);
+       const __be32 *pkey = daddr;
+ 
+       if (rt->rt_gw_family == AF_INET) {
+@@ -1025,7 +1025,7 @@ static void __ip_rt_update_pmtu(struct r
+               return;
+ 
+       rcu_read_lock();
+-      net = dev_net_rcu(dst->dev);
++      net = dev_net_rcu(dst_dev(dst));
+       if (mtu < net->ipv4.ip_rt_min_pmtu) {
+               lock = true;
+               mtu = min(old_mtu, net->ipv4.ip_rt_min_pmtu);
+@@ -1323,7 +1323,7 @@ static unsigned int ipv4_default_advmss(
+       struct net *net;
+ 
+       rcu_read_lock();
+-      net = dev_net_rcu(dst->dev);
++      net = dev_net_rcu(dst_dev(dst));
+       advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size,
+                                  net->ipv4.ip_rt_min_advmss);
+       rcu_read_unlock();
+--- a/net/ipv4/tcp_fastopen.c
++++ b/net/ipv4/tcp_fastopen.c
+@@ -558,6 +558,7 @@ bool tcp_fastopen_active_should_disable(
+ void tcp_fastopen_active_disable_ofo_check(struct sock *sk)
+ {
+       struct tcp_sock *tp = tcp_sk(sk);
++      struct net_device *dev;
+       struct dst_entry *dst;
+       struct sk_buff *skb;
+ 
+@@ -575,7 +576,8 @@ void tcp_fastopen_active_disable_ofo_che
+       } else if (tp->syn_fastopen_ch &&
+                  atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times)) {
+               dst = sk_dst_get(sk);
+-              if (!(dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK)))
++              dev = dst ? dst_dev(dst) : NULL;
++              if (!(dev && (dev->flags & IFF_LOOPBACK)))
+                       atomic_set(&sock_net(sk)->ipv4.tfo_active_disable_times, 0);
+               dst_release(dst);
+       }
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -786,7 +786,7 @@ static void tcp_v4_send_reset(const stru
+       arg.iov[0].iov_base = (unsigned char *)&rep;
+       arg.iov[0].iov_len  = sizeof(rep.th);
+ 
+-      net = sk ? sock_net(sk) : dev_net_rcu(skb_dst(skb)->dev);
++      net = sk ? sock_net(sk) : skb_dst_dev_net_rcu(skb);
+ 
+       /* Invalid TCP option size or twice included auth */
+       if (tcp_parse_auth_options(tcp_hdr(skb), &md5_hash_location, &aoh))
+--- a/net/ipv4/tcp_metrics.c
++++ b/net/ipv4/tcp_metrics.c
+@@ -166,11 +166,11 @@ static struct tcp_metrics_block *tcpm_ne
+                                         unsigned int hash)
+ {
+       struct tcp_metrics_block *tm;
+-      struct net *net;
+       bool reclaim = false;
++      struct net *net;
+ 
+       spin_lock_bh(&tcp_metrics_lock);
+-      net = dev_net_rcu(dst->dev);
++      net = dev_net_rcu(dst_dev(dst));
+ 
+       /* While waiting for the spin-lock the cache might have been populated
+        * with this entry and so we have to check again.
+@@ -273,7 +273,7 @@ static struct tcp_metrics_block *__tcp_g
+               return NULL;
+       }
+ 
+-      net = dev_net_rcu(dst->dev);
++      net = dev_net_rcu(dst_dev(dst));
+       hash ^= net_hash_mix(net);
+       hash = hash_32(hash, tcp_metrics_hash_log);
+ 
+@@ -318,7 +318,7 @@ static struct tcp_metrics_block *tcp_get
+       else
+               return NULL;
+ 
+-      net = dev_net_rcu(dst->dev);
++      net = dev_net_rcu(dst_dev(dst));
+       hash ^= net_hash_mix(net);
+       hash = hash_32(hash, tcp_metrics_hash_log);
+ 
+--- a/net/ipv4/xfrm4_output.c
++++ b/net/ipv4/xfrm4_output.c
+@@ -31,7 +31,7 @@ static int __xfrm4_output(struct net *ne
+ int xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+ {
+       return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
+-                          net, sk, skb, skb->dev, skb_dst(skb)->dev,
++                          net, sk, skb, skb->dev, skb_dst_dev(skb),
+                           __xfrm4_output,
+                           !(IPCB(skb)->flags & IPSKB_REROUTED));
+ }
diff --git a/queue-6.12/ixgbevf-add-support-for-intel-r-e610-device.patch b/queue-6.12/ixgbevf-add-support-for-intel-r-e610-device.patch

new file mode 100644 (file)

index 0000000..8642e7d
--- /dev/null
+++ b/queue-6.12/ixgbevf-add-support-for-intel-r-e610-device.patch
@@ -0,0 +1,171 @@
+From stable+bounces-188208-greg=kroah.com@vger.kernel.org Mon Oct 20 19:28:52 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Oct 2025 13:28:39 -0400
+Subject: ixgbevf: Add support for Intel(R) E610 device
+To: stable@vger.kernel.org
+Cc: Piotr Kwapulinski <piotr.kwapulinski@intel.com>, Przemek Kitszel <przemyslaw.kitszel@intel.com>, Simon Horman <horms@kernel.org>, Rafal Romanowski <rafal.romanowski@intel.com>, Tony Nguyen <anthony.l.nguyen@intel.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251020172841.1850940-2-sashal@kernel.org>
+
+From: Piotr Kwapulinski <piotr.kwapulinski@intel.com>
+
+[ Upstream commit 4c44b450c69b676955c2790dcf467c1f969d80f1 ]
+
+Add support for Intel(R) E610 Series of network devices. The E610
+is based on X550 but adds firmware managed link, enhanced security
+capabilities and support for updated server manageability
+
+Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
+Signed-off-by: Piotr Kwapulinski <piotr.kwapulinski@intel.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Stable-dep-of: a7075f501bd3 ("ixgbevf: fix mailbox API compatibility by negotiating supported features")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/intel/ixgbevf/defines.h      |    5 ++++-
+ drivers/net/ethernet/intel/ixgbevf/ixgbevf.h      |    6 +++++-
+ drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c |   12 ++++++++++--
+ drivers/net/ethernet/intel/ixgbevf/vf.c           |   12 +++++++++++-
+ drivers/net/ethernet/intel/ixgbevf/vf.h           |    4 +++-
+ 5 files changed, 33 insertions(+), 6 deletions(-)
+
+--- a/drivers/net/ethernet/intel/ixgbevf/defines.h
++++ b/drivers/net/ethernet/intel/ixgbevf/defines.h
+@@ -1,5 +1,5 @@
+ /* SPDX-License-Identifier: GPL-2.0 */
+-/* Copyright(c) 1999 - 2018 Intel Corporation. */
++/* Copyright(c) 1999 - 2024 Intel Corporation. */
+ 
+ #ifndef _IXGBEVF_DEFINES_H_
+ #define _IXGBEVF_DEFINES_H_
+@@ -16,6 +16,9 @@
+ #define IXGBE_DEV_ID_X550_VF_HV               0x1564
+ #define IXGBE_DEV_ID_X550EM_X_VF_HV   0x15A9
+ 
++#define IXGBE_DEV_ID_E610_VF          0x57AD
++#define IXGBE_SUBDEV_ID_E610_VF_HV    0x00FF
++
+ #define IXGBE_VF_IRQ_CLEAR_MASK               7
+ #define IXGBE_VF_MAX_TX_QUEUES                8
+ #define IXGBE_VF_MAX_RX_QUEUES                8
+--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
++++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
+@@ -1,5 +1,5 @@
+ /* SPDX-License-Identifier: GPL-2.0 */
+-/* Copyright(c) 1999 - 2018 Intel Corporation. */
++/* Copyright(c) 1999 - 2024 Intel Corporation. */
+ 
+ #ifndef _IXGBEVF_H_
+ #define _IXGBEVF_H_
+@@ -418,6 +418,8 @@ enum ixgbevf_boards {
+       board_X550EM_x_vf,
+       board_X550EM_x_vf_hv,
+       board_x550em_a_vf,
++      board_e610_vf,
++      board_e610_vf_hv,
+ };
+ 
+ enum ixgbevf_xcast_modes {
+@@ -434,11 +436,13 @@ extern const struct ixgbevf_info ixgbevf
+ extern const struct ixgbe_mbx_operations ixgbevf_mbx_ops;
+ extern const struct ixgbe_mbx_operations ixgbevf_mbx_ops_legacy;
+ extern const struct ixgbevf_info ixgbevf_x550em_a_vf_info;
++extern const struct ixgbevf_info ixgbevf_e610_vf_info;
+ 
+ extern const struct ixgbevf_info ixgbevf_82599_vf_hv_info;
+ extern const struct ixgbevf_info ixgbevf_X540_vf_hv_info;
+ extern const struct ixgbevf_info ixgbevf_X550_vf_hv_info;
+ extern const struct ixgbevf_info ixgbevf_X550EM_x_vf_hv_info;
++extern const struct ixgbevf_info ixgbevf_e610_vf_hv_info;
+ extern const struct ixgbe_mbx_operations ixgbevf_hv_mbx_ops;
+ 
+ /* needed by ethtool.c */
+--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
++++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+@@ -1,5 +1,5 @@
+ // SPDX-License-Identifier: GPL-2.0
+-/* Copyright(c) 1999 - 2018 Intel Corporation. */
++/* Copyright(c) 1999 - 2024 Intel Corporation. */
+ 
+ /******************************************************************************
+  Copyright (c)2006 - 2007 Myricom, Inc. for some LRO specific code
+@@ -39,7 +39,7 @@ static const char ixgbevf_driver_string[
+       "Intel(R) 10 Gigabit PCI Express Virtual Function Network Driver";
+ 
+ static char ixgbevf_copyright[] =
+-      "Copyright (c) 2009 - 2018 Intel Corporation.";
++      "Copyright (c) 2009 - 2024 Intel Corporation.";
+ 
+ static const struct ixgbevf_info *ixgbevf_info_tbl[] = {
+       [board_82599_vf]        = &ixgbevf_82599_vf_info,
+@@ -51,6 +51,8 @@ static const struct ixgbevf_info *ixgbev
+       [board_X550EM_x_vf]     = &ixgbevf_X550EM_x_vf_info,
+       [board_X550EM_x_vf_hv]  = &ixgbevf_X550EM_x_vf_hv_info,
+       [board_x550em_a_vf]     = &ixgbevf_x550em_a_vf_info,
++      [board_e610_vf]         = &ixgbevf_e610_vf_info,
++      [board_e610_vf_hv]      = &ixgbevf_e610_vf_hv_info,
+ };
+ 
+ /* ixgbevf_pci_tbl - PCI Device ID Table
+@@ -71,6 +73,9 @@ static const struct pci_device_id ixgbev
+       {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_X_VF), board_X550EM_x_vf },
+       {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_X_VF_HV), board_X550EM_x_vf_hv},
+       {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_VF), board_x550em_a_vf },
++      {PCI_VDEVICE_SUB(INTEL, IXGBE_DEV_ID_E610_VF, PCI_ANY_ID,
++                       IXGBE_SUBDEV_ID_E610_VF_HV), board_e610_vf_hv},
++      {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_E610_VF), board_e610_vf},
+       /* required last entry */
+       {0, }
+ };
+@@ -4693,6 +4698,9 @@ static int ixgbevf_probe(struct pci_dev
+       case ixgbe_mac_X540_vf:
+               dev_info(&pdev->dev, "Intel(R) X540 Virtual Function\n");
+               break;
++      case ixgbe_mac_e610_vf:
++              dev_info(&pdev->dev, "Intel(R) E610 Virtual Function\n");
++              break;
+       case ixgbe_mac_82599_vf:
+       default:
+               dev_info(&pdev->dev, "Intel(R) 82599 Virtual Function\n");
+--- a/drivers/net/ethernet/intel/ixgbevf/vf.c
++++ b/drivers/net/ethernet/intel/ixgbevf/vf.c
+@@ -1,5 +1,5 @@
+ // SPDX-License-Identifier: GPL-2.0
+-/* Copyright(c) 1999 - 2018 Intel Corporation. */
++/* Copyright(c) 1999 - 2024 Intel Corporation. */
+ 
+ #include "vf.h"
+ #include "ixgbevf.h"
+@@ -1076,3 +1076,13 @@ const struct ixgbevf_info ixgbevf_x550em
+       .mac = ixgbe_mac_x550em_a_vf,
+       .mac_ops = &ixgbevf_mac_ops,
+ };
++
++const struct ixgbevf_info ixgbevf_e610_vf_info = {
++      .mac                    = ixgbe_mac_e610_vf,
++      .mac_ops                = &ixgbevf_mac_ops,
++};
++
++const struct ixgbevf_info ixgbevf_e610_vf_hv_info = {
++      .mac            = ixgbe_mac_e610_vf,
++      .mac_ops        = &ixgbevf_hv_mac_ops,
++};
+--- a/drivers/net/ethernet/intel/ixgbevf/vf.h
++++ b/drivers/net/ethernet/intel/ixgbevf/vf.h
+@@ -1,5 +1,5 @@
+ /* SPDX-License-Identifier: GPL-2.0 */
+-/* Copyright(c) 1999 - 2018 Intel Corporation. */
++/* Copyright(c) 1999 - 2024 Intel Corporation. */
+ 
+ #ifndef __IXGBE_VF_H__
+ #define __IXGBE_VF_H__
+@@ -54,6 +54,8 @@ enum ixgbe_mac_type {
+       ixgbe_mac_X550_vf,
+       ixgbe_mac_X550EM_x_vf,
+       ixgbe_mac_x550em_a_vf,
++      ixgbe_mac_e610,
++      ixgbe_mac_e610_vf,
+       ixgbe_num_macs
+ };
+ 
diff --git a/queue-6.12/ixgbevf-fix-getting-link-speed-data-for-e610-devices.patch b/queue-6.12/ixgbevf-fix-getting-link-speed-data-for-e610-devices.patch

new file mode 100644 (file)

index 0000000..12bbe6c
--- /dev/null
+++ b/queue-6.12/ixgbevf-fix-getting-link-speed-data-for-e610-devices.patch
@@ -0,0 +1,306 @@
+From stable+bounces-188209-greg=kroah.com@vger.kernel.org Mon Oct 20 19:28:52 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Oct 2025 13:28:40 -0400
+Subject: ixgbevf: fix getting link speed data for E610 devices
+To: stable@vger.kernel.org
+Cc: Jedrzej Jagielski <jedrzej.jagielski@intel.com>, Andrzej Wilczynski <andrzejx.wilczynski@intel.com>, Przemek Kitszel <przemyslaw.kitszel@intel.com>, Aleksandr Loktionov <aleksandr.loktionov@intel.com>, Rafal Romanowski <rafal.romanowski@intel.com>, Jacob Keller <jacob.e.keller@intel.com>, Jakub Kicinski <kuba@kernel.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251020172841.1850940-3-sashal@kernel.org>
+
+From: Jedrzej Jagielski <jedrzej.jagielski@intel.com>
+
+[ Upstream commit 53f0eb62b4d23d40686f2dd51776b8220f2887bb ]
+
+E610 adapters no longer use the VFLINKS register to read PF's link
+speed and linkup state. As a result VF driver cannot get actual link
+state and it incorrectly reports 10G which is the default option.
+It leads to a situation where even 1G adapters print 10G as actual
+link speed. The same happens when PF driver set speed different than 10G.
+
+Add new mailbox operation to let the VF driver request a PF driver
+to provide actual link data. Update the mailbox api to v1.6.
+
+Incorporate both ways of getting link status within the legacy
+ixgbe_check_mac_link_vf() function.
+
+Fixes: 4c44b450c69b ("ixgbevf: Add support for Intel(R) E610 device")
+Co-developed-by: Andrzej Wilczynski <andrzejx.wilczynski@intel.com>
+Signed-off-by: Andrzej Wilczynski <andrzejx.wilczynski@intel.com>
+Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
+Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Jedrzej Jagielski <jedrzej.jagielski@intel.com>
+Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
+Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
+Link: https://patch.msgid.link/20251009-jk-iwl-net-2025-10-01-v3-2-ef32a425b92a@intel.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: a7075f501bd3 ("ixgbevf: fix mailbox API compatibility by negotiating supported features")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/intel/ixgbevf/defines.h      |    1 
+ drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c |    6 
+ drivers/net/ethernet/intel/ixgbevf/mbx.h          |    4 
+ drivers/net/ethernet/intel/ixgbevf/vf.c           |  137 +++++++++++++++++-----
+ 4 files changed, 116 insertions(+), 32 deletions(-)
+
+--- a/drivers/net/ethernet/intel/ixgbevf/defines.h
++++ b/drivers/net/ethernet/intel/ixgbevf/defines.h
+@@ -28,6 +28,7 @@
+ 
+ /* Link speed */
+ typedef u32 ixgbe_link_speed;
++#define IXGBE_LINK_SPEED_UNKNOWN      0
+ #define IXGBE_LINK_SPEED_1GB_FULL     0x0020
+ #define IXGBE_LINK_SPEED_10GB_FULL    0x0080
+ #define IXGBE_LINK_SPEED_100_FULL     0x0008
+--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
++++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+@@ -2278,6 +2278,7 @@ static void ixgbevf_negotiate_api(struct
+ {
+       struct ixgbe_hw *hw = &adapter->hw;
+       static const int api[] = {
++              ixgbe_mbox_api_16,
+               ixgbe_mbox_api_15,
+               ixgbe_mbox_api_14,
+               ixgbe_mbox_api_13,
+@@ -2297,7 +2298,8 @@ static void ixgbevf_negotiate_api(struct
+               idx++;
+       }
+ 
+-      if (hw->api_version >= ixgbe_mbox_api_15) {
++      /* Following is not supported by API 1.6, it is specific for 1.5 */
++      if (hw->api_version == ixgbe_mbox_api_15) {
+               hw->mbx.ops.init_params(hw);
+               memcpy(&hw->mbx.ops, &ixgbevf_mbx_ops,
+                      sizeof(struct ixgbe_mbx_operations));
+@@ -2654,6 +2656,7 @@ static void ixgbevf_set_num_queues(struc
+               case ixgbe_mbox_api_13:
+               case ixgbe_mbox_api_14:
+               case ixgbe_mbox_api_15:
++              case ixgbe_mbox_api_16:
+                       if (adapter->xdp_prog &&
+                           hw->mac.max_tx_queues == rss)
+                               rss = rss > 3 ? 2 : 1;
+@@ -4648,6 +4651,7 @@ static int ixgbevf_probe(struct pci_dev
+       case ixgbe_mbox_api_13:
+       case ixgbe_mbox_api_14:
+       case ixgbe_mbox_api_15:
++      case ixgbe_mbox_api_16:
+               netdev->max_mtu = IXGBE_MAX_JUMBO_FRAME_SIZE -
+                                 (ETH_HLEN + ETH_FCS_LEN);
+               break;
+--- a/drivers/net/ethernet/intel/ixgbevf/mbx.h
++++ b/drivers/net/ethernet/intel/ixgbevf/mbx.h
+@@ -66,6 +66,7 @@ enum ixgbe_pfvf_api_rev {
+       ixgbe_mbox_api_13,      /* API version 1.3, linux/freebsd VF driver */
+       ixgbe_mbox_api_14,      /* API version 1.4, linux/freebsd VF driver */
+       ixgbe_mbox_api_15,      /* API version 1.5, linux/freebsd VF driver */
++      ixgbe_mbox_api_16,      /* API version 1.6, linux/freebsd VF driver */
+       /* This value should always be last */
+       ixgbe_mbox_api_unknown, /* indicates that API version is not known */
+ };
+@@ -102,6 +103,9 @@ enum ixgbe_pfvf_api_rev {
+ 
+ #define IXGBE_VF_GET_LINK_STATE 0x10 /* get vf link state */
+ 
++/* mailbox API, version 1.6 VF requests */
++#define IXGBE_VF_GET_PF_LINK_STATE    0x11 /* request PF to send link info */
++
+ /* length of permanent address message returned from PF */
+ #define IXGBE_VF_PERMADDR_MSG_LEN     4
+ /* word in permanent address message with the current multicast type */
+--- a/drivers/net/ethernet/intel/ixgbevf/vf.c
++++ b/drivers/net/ethernet/intel/ixgbevf/vf.c
+@@ -313,6 +313,7 @@ int ixgbevf_get_reta_locked(struct ixgbe
+        * is not supported for this device type.
+        */
+       switch (hw->api_version) {
++      case ixgbe_mbox_api_16:
+       case ixgbe_mbox_api_15:
+       case ixgbe_mbox_api_14:
+       case ixgbe_mbox_api_13:
+@@ -382,6 +383,7 @@ int ixgbevf_get_rss_key_locked(struct ix
+        * or if the operation is not supported for this device type.
+        */
+       switch (hw->api_version) {
++      case ixgbe_mbox_api_16:
+       case ixgbe_mbox_api_15:
+       case ixgbe_mbox_api_14:
+       case ixgbe_mbox_api_13:
+@@ -552,6 +554,7 @@ static s32 ixgbevf_update_xcast_mode(str
+       case ixgbe_mbox_api_13:
+       case ixgbe_mbox_api_14:
+       case ixgbe_mbox_api_15:
++      case ixgbe_mbox_api_16:
+               break;
+       default:
+               return -EOPNOTSUPP;
+@@ -625,6 +628,48 @@ static s32 ixgbevf_hv_get_link_state_vf(
+ }
+ 
+ /**
++ * ixgbevf_get_pf_link_state - Get PF's link status
++ * @hw: pointer to the HW structure
++ * @speed: link speed
++ * @link_up: indicate if link is up/down
++ *
++ * Ask PF to provide link_up state and speed of the link.
++ *
++ * Return: IXGBE_ERR_MBX in the case of mailbox error,
++ * -EOPNOTSUPP if the op is not supported or 0 on success.
++ */
++static int ixgbevf_get_pf_link_state(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
++                                   bool *link_up)
++{
++      u32 msgbuf[3] = {};
++      int err;
++
++      switch (hw->api_version) {
++      case ixgbe_mbox_api_16:
++              break;
++      default:
++              return -EOPNOTSUPP;
++      }
++
++      msgbuf[0] = IXGBE_VF_GET_PF_LINK_STATE;
++
++      err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf,
++                                       ARRAY_SIZE(msgbuf));
++      if (err || (msgbuf[0] & IXGBE_VT_MSGTYPE_FAILURE)) {
++              err = IXGBE_ERR_MBX;
++              *speed = IXGBE_LINK_SPEED_UNKNOWN;
++              /* No need to set @link_up to false as it will be done by
++               * ixgbe_check_mac_link_vf().
++               */
++      } else {
++              *speed = msgbuf[1];
++              *link_up = msgbuf[2];
++      }
++
++      return err;
++}
++
++/**
+  *  ixgbevf_set_vfta_vf - Set/Unset VLAN filter table address
+  *  @hw: pointer to the HW structure
+  *  @vlan: 12 bit VLAN ID
+@@ -659,6 +704,58 @@ mbx_err:
+ }
+ 
+ /**
++ * ixgbe_read_vflinks - Read VFLINKS register
++ * @hw: pointer to the HW structure
++ * @speed: link speed
++ * @link_up: indicate if link is up/down
++ *
++ * Get linkup status and link speed from the VFLINKS register.
++ */
++static void ixgbe_read_vflinks(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
++                             bool *link_up)
++{
++      u32 vflinks = IXGBE_READ_REG(hw, IXGBE_VFLINKS);
++
++      /* if link status is down no point in checking to see if PF is up */
++      if (!(vflinks & IXGBE_LINKS_UP)) {
++              *link_up = false;
++              return;
++      }
++
++      /* for SFP+ modules and DA cables on 82599 it can take up to 500usecs
++       * before the link status is correct
++       */
++      if (hw->mac.type == ixgbe_mac_82599_vf) {
++              for (int i = 0; i < 5; i++) {
++                      udelay(100);
++                      vflinks = IXGBE_READ_REG(hw, IXGBE_VFLINKS);
++
++                      if (!(vflinks & IXGBE_LINKS_UP)) {
++                              *link_up = false;
++                              return;
++                      }
++              }
++      }
++
++      /* We reached this point so there's link */
++      *link_up = true;
++
++      switch (vflinks & IXGBE_LINKS_SPEED_82599) {
++      case IXGBE_LINKS_SPEED_10G_82599:
++              *speed = IXGBE_LINK_SPEED_10GB_FULL;
++              break;
++      case IXGBE_LINKS_SPEED_1G_82599:
++              *speed = IXGBE_LINK_SPEED_1GB_FULL;
++              break;
++      case IXGBE_LINKS_SPEED_100_82599:
++              *speed = IXGBE_LINK_SPEED_100_FULL;
++              break;
++      default:
++              *speed = IXGBE_LINK_SPEED_UNKNOWN;
++      }
++}
++
++/**
+  * ixgbevf_hv_set_vfta_vf - * Hyper-V variant - just a stub.
+  * @hw: unused
+  * @vlan: unused
+@@ -705,7 +802,6 @@ static s32 ixgbevf_check_mac_link_vf(str
+       struct ixgbe_mbx_info *mbx = &hw->mbx;
+       struct ixgbe_mac_info *mac = &hw->mac;
+       s32 ret_val = 0;
+-      u32 links_reg;
+       u32 in_msg = 0;
+ 
+       /* If we were hit with a reset drop the link */
+@@ -715,36 +811,14 @@ static s32 ixgbevf_check_mac_link_vf(str
+       if (!mac->get_link_status)
+               goto out;
+ 
+-      /* if link status is down no point in checking to see if pf is up */
+-      links_reg = IXGBE_READ_REG(hw, IXGBE_VFLINKS);
+-      if (!(links_reg & IXGBE_LINKS_UP))
+-              goto out;
+-
+-      /* for SFP+ modules and DA cables on 82599 it can take up to 500usecs
+-       * before the link status is correct
+-       */
+-      if (mac->type == ixgbe_mac_82599_vf) {
+-              int i;
+-
+-              for (i = 0; i < 5; i++) {
+-                      udelay(100);
+-                      links_reg = IXGBE_READ_REG(hw, IXGBE_VFLINKS);
+-
+-                      if (!(links_reg & IXGBE_LINKS_UP))
+-                              goto out;
+-              }
+-      }
+-
+-      switch (links_reg & IXGBE_LINKS_SPEED_82599) {
+-      case IXGBE_LINKS_SPEED_10G_82599:
+-              *speed = IXGBE_LINK_SPEED_10GB_FULL;
+-              break;
+-      case IXGBE_LINKS_SPEED_1G_82599:
+-              *speed = IXGBE_LINK_SPEED_1GB_FULL;
+-              break;
+-      case IXGBE_LINKS_SPEED_100_82599:
+-              *speed = IXGBE_LINK_SPEED_100_FULL;
+-              break;
++      if (hw->mac.type == ixgbe_mac_e610_vf) {
++              ret_val = ixgbevf_get_pf_link_state(hw, speed, link_up);
++              if (ret_val)
++                      goto out;
++      } else {
++              ixgbe_read_vflinks(hw, speed, link_up);
++              if (*link_up == false)
++                      goto out;
+       }
+ 
+       /* if the read failed it could just be a mailbox collision, best wait
+@@ -951,6 +1025,7 @@ int ixgbevf_get_queues(struct ixgbe_hw *
+       case ixgbe_mbox_api_13:
+       case ixgbe_mbox_api_14:
+       case ixgbe_mbox_api_15:
++      case ixgbe_mbox_api_16:
+               break;
+       default:
+               return 0;
diff --git a/queue-6.12/ixgbevf-fix-mailbox-api-compatibility-by-negotiating-supported-features.patch b/queue-6.12/ixgbevf-fix-mailbox-api-compatibility-by-negotiating-supported-features.patch

new file mode 100644 (file)

index 0000000..0650c0f
--- /dev/null
+++ b/queue-6.12/ixgbevf-fix-mailbox-api-compatibility-by-negotiating-supported-features.patch
@@ -0,0 +1,327 @@
+From stable+bounces-188210-greg=kroah.com@vger.kernel.org Mon Oct 20 19:28:53 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Oct 2025 13:28:41 -0400
+Subject: ixgbevf: fix mailbox API compatibility by negotiating supported features
+To: stable@vger.kernel.org
+Cc: Jedrzej Jagielski <jedrzej.jagielski@intel.com>, Jacob Keller <jacob.e.keller@intel.com>, Przemek Kitszel <przemyslaw.kitszel@intel.com>, Aleksandr Loktionov <aleksandr.loktionov@intel.com>, Rafal Romanowski <rafal.romanowski@intel.com>, Jakub Kicinski <kuba@kernel.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251020172841.1850940-4-sashal@kernel.org>
+
+From: Jedrzej Jagielski <jedrzej.jagielski@intel.com>
+
+[ Upstream commit a7075f501bd33c93570af759b6f4302ef0175168 ]
+
+There was backward compatibility in the terms of mailbox API. Various
+drivers from various OSes supporting 10G adapters from Intel portfolio
+could easily negotiate mailbox API.
+
+This convention has been broken since introducing API 1.4.
+Commit 0062e7cc955e ("ixgbevf: add VF IPsec offload code") added support
+for IPSec which is specific only for the kernel ixgbe driver. None of the
+rest of the Intel 10G PF/VF drivers supports it. And actually lack of
+support was not included in the IPSec implementation - there were no such
+code paths. No possibility to negotiate support for the feature was
+introduced along with introduction of the feature itself.
+
+Commit 339f28964147 ("ixgbevf: Add support for new mailbox communication
+between PF and VF") increasing API version to 1.5 did the same - it
+introduced code supported specifically by the PF ESX driver. It altered API
+version for the VF driver in the same time not touching the version
+defined for the PF ixgbe driver. It led to additional discrepancies,
+as the code provided within API 1.6 cannot be supported for Linux ixgbe
+driver as it causes crashes.
+
+The issue was noticed some time ago and mitigated by Jake within the commit
+d0725312adf5 ("ixgbevf: stop attempting IPSEC offload on Mailbox API 1.5").
+As a result we have regression for IPsec support and after increasing API
+to version 1.6 ixgbevf driver stopped to support ESX MBX.
+
+To fix this mess add new mailbox op asking PF driver about supported
+features. Basing on a response determine whether to set support for IPSec
+and ESX-specific enhanced mailbox.
+
+New mailbox op, for compatibility purposes, must be added within new API
+revision, as API version of OOT PF & VF drivers is already increased to
+1.6 and doesn't incorporate features negotiate op.
+
+Features negotiation mechanism gives possibility to be extended with new
+features when needed in the future.
+
+Reported-by: Jacob Keller <jacob.e.keller@intel.com>
+Closes: https://lore.kernel.org/intel-wired-lan/20241101-jk-ixgbevf-mailbox-v1-5-fixes-v1-0-f556dc9a66ed@intel.com/
+Fixes: 0062e7cc955e ("ixgbevf: add VF IPsec offload code")
+Fixes: 339f28964147 ("ixgbevf: Add support for new mailbox communication between PF and VF")
+Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
+Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
+Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Jedrzej Jagielski <jedrzej.jagielski@intel.com>
+Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
+Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
+Link: https://patch.msgid.link/20251009-jk-iwl-net-2025-10-01-v3-4-ef32a425b92a@intel.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/intel/ixgbevf/ipsec.c        |   10 ++++
+ drivers/net/ethernet/intel/ixgbevf/ixgbevf.h      |    7 +++
+ drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c |   32 ++++++++++++++-
+ drivers/net/ethernet/intel/ixgbevf/mbx.h          |    4 +
+ drivers/net/ethernet/intel/ixgbevf/vf.c           |   45 +++++++++++++++++++++-
+ drivers/net/ethernet/intel/ixgbevf/vf.h           |    1 
+ 6 files changed, 96 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/intel/ixgbevf/ipsec.c
++++ b/drivers/net/ethernet/intel/ixgbevf/ipsec.c
+@@ -271,6 +271,9 @@ static int ixgbevf_ipsec_add_sa(struct x
+       adapter = netdev_priv(dev);
+       ipsec = adapter->ipsec;
+ 
++      if (!(adapter->pf_features & IXGBEVF_PF_SUP_IPSEC))
++              return -EOPNOTSUPP;
++
+       if (xs->id.proto != IPPROTO_ESP && xs->id.proto != IPPROTO_AH) {
+               NL_SET_ERR_MSG_MOD(extack, "Unsupported protocol for IPsec offload");
+               return -EINVAL;
+@@ -400,6 +403,9 @@ static void ixgbevf_ipsec_del_sa(struct
+       adapter = netdev_priv(dev);
+       ipsec = adapter->ipsec;
+ 
++      if (!(adapter->pf_features & IXGBEVF_PF_SUP_IPSEC))
++              return;
++
+       if (xs->xso.dir == XFRM_DEV_OFFLOAD_IN) {
+               sa_idx = xs->xso.offload_handle - IXGBE_IPSEC_BASE_RX_INDEX;
+ 
+@@ -628,6 +634,10 @@ void ixgbevf_init_ipsec_offload(struct i
+       size_t size;
+ 
+       switch (adapter->hw.api_version) {
++      case ixgbe_mbox_api_17:
++              if (!(adapter->pf_features & IXGBEVF_PF_SUP_IPSEC))
++                      return;
++              break;
+       case ixgbe_mbox_api_14:
+               break;
+       default:
+--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
++++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
+@@ -366,6 +366,13 @@ struct ixgbevf_adapter {
+       /* Interrupt Throttle Rate */
+       u32 eitr_param;
+ 
++      u32 pf_features;
++#define IXGBEVF_PF_SUP_IPSEC          BIT(0)
++#define IXGBEVF_PF_SUP_ESX_MBX                BIT(1)
++
++#define IXGBEVF_SUPPORTED_FEATURES    (IXGBEVF_PF_SUP_IPSEC | \
++                                      IXGBEVF_PF_SUP_ESX_MBX)
++
+       struct ixgbevf_hw_stats stats;
+ 
+       unsigned long state;
+--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
++++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+@@ -2274,10 +2274,35 @@ static void ixgbevf_init_last_counter_st
+       adapter->stats.base_vfmprc = adapter->stats.last_vfmprc;
+ }
+ 
++/**
++ * ixgbevf_set_features - Set features supported by PF
++ * @adapter: pointer to the adapter struct
++ *
++ * Negotiate with PF supported features and then set pf_features accordingly.
++ */
++static void ixgbevf_set_features(struct ixgbevf_adapter *adapter)
++{
++      u32 *pf_features = &adapter->pf_features;
++      struct ixgbe_hw *hw = &adapter->hw;
++      int err;
++
++      err = hw->mac.ops.negotiate_features(hw, pf_features);
++      if (err && err != -EOPNOTSUPP)
++              netdev_dbg(adapter->netdev,
++                         "PF feature negotiation failed.\n");
++
++      /* Address also pre API 1.7 cases */
++      if (hw->api_version == ixgbe_mbox_api_14)
++              *pf_features |= IXGBEVF_PF_SUP_IPSEC;
++      else if (hw->api_version == ixgbe_mbox_api_15)
++              *pf_features |= IXGBEVF_PF_SUP_ESX_MBX;
++}
++
+ static void ixgbevf_negotiate_api(struct ixgbevf_adapter *adapter)
+ {
+       struct ixgbe_hw *hw = &adapter->hw;
+       static const int api[] = {
++              ixgbe_mbox_api_17,
+               ixgbe_mbox_api_16,
+               ixgbe_mbox_api_15,
+               ixgbe_mbox_api_14,
+@@ -2298,8 +2323,9 @@ static void ixgbevf_negotiate_api(struct
+               idx++;
+       }
+ 
+-      /* Following is not supported by API 1.6, it is specific for 1.5 */
+-      if (hw->api_version == ixgbe_mbox_api_15) {
++      ixgbevf_set_features(adapter);
++
++      if (adapter->pf_features & IXGBEVF_PF_SUP_ESX_MBX) {
+               hw->mbx.ops.init_params(hw);
+               memcpy(&hw->mbx.ops, &ixgbevf_mbx_ops,
+                      sizeof(struct ixgbe_mbx_operations));
+@@ -2657,6 +2683,7 @@ static void ixgbevf_set_num_queues(struc
+               case ixgbe_mbox_api_14:
+               case ixgbe_mbox_api_15:
+               case ixgbe_mbox_api_16:
++              case ixgbe_mbox_api_17:
+                       if (adapter->xdp_prog &&
+                           hw->mac.max_tx_queues == rss)
+                               rss = rss > 3 ? 2 : 1;
+@@ -4652,6 +4679,7 @@ static int ixgbevf_probe(struct pci_dev
+       case ixgbe_mbox_api_14:
+       case ixgbe_mbox_api_15:
+       case ixgbe_mbox_api_16:
++      case ixgbe_mbox_api_17:
+               netdev->max_mtu = IXGBE_MAX_JUMBO_FRAME_SIZE -
+                                 (ETH_HLEN + ETH_FCS_LEN);
+               break;
+--- a/drivers/net/ethernet/intel/ixgbevf/mbx.h
++++ b/drivers/net/ethernet/intel/ixgbevf/mbx.h
+@@ -67,6 +67,7 @@ enum ixgbe_pfvf_api_rev {
+       ixgbe_mbox_api_14,      /* API version 1.4, linux/freebsd VF driver */
+       ixgbe_mbox_api_15,      /* API version 1.5, linux/freebsd VF driver */
+       ixgbe_mbox_api_16,      /* API version 1.6, linux/freebsd VF driver */
++      ixgbe_mbox_api_17,      /* API version 1.7, linux/freebsd VF driver */
+       /* This value should always be last */
+       ixgbe_mbox_api_unknown, /* indicates that API version is not known */
+ };
+@@ -106,6 +107,9 @@ enum ixgbe_pfvf_api_rev {
+ /* mailbox API, version 1.6 VF requests */
+ #define IXGBE_VF_GET_PF_LINK_STATE    0x11 /* request PF to send link info */
+ 
++/* mailbox API, version 1.7 VF requests */
++#define IXGBE_VF_FEATURES_NEGOTIATE   0x12 /* get features supported by PF*/
++
+ /* length of permanent address message returned from PF */
+ #define IXGBE_VF_PERMADDR_MSG_LEN     4
+ /* word in permanent address message with the current multicast type */
+--- a/drivers/net/ethernet/intel/ixgbevf/vf.c
++++ b/drivers/net/ethernet/intel/ixgbevf/vf.c
+@@ -313,6 +313,7 @@ int ixgbevf_get_reta_locked(struct ixgbe
+        * is not supported for this device type.
+        */
+       switch (hw->api_version) {
++      case ixgbe_mbox_api_17:
+       case ixgbe_mbox_api_16:
+       case ixgbe_mbox_api_15:
+       case ixgbe_mbox_api_14:
+@@ -383,6 +384,7 @@ int ixgbevf_get_rss_key_locked(struct ix
+        * or if the operation is not supported for this device type.
+        */
+       switch (hw->api_version) {
++      case ixgbe_mbox_api_17:
+       case ixgbe_mbox_api_16:
+       case ixgbe_mbox_api_15:
+       case ixgbe_mbox_api_14:
+@@ -555,6 +557,7 @@ static s32 ixgbevf_update_xcast_mode(str
+       case ixgbe_mbox_api_14:
+       case ixgbe_mbox_api_15:
+       case ixgbe_mbox_api_16:
++      case ixgbe_mbox_api_17:
+               break;
+       default:
+               return -EOPNOTSUPP;
+@@ -646,6 +649,7 @@ static int ixgbevf_get_pf_link_state(str
+ 
+       switch (hw->api_version) {
+       case ixgbe_mbox_api_16:
++      case ixgbe_mbox_api_17:
+               break;
+       default:
+               return -EOPNOTSUPP;
+@@ -670,6 +674,42 @@ static int ixgbevf_get_pf_link_state(str
+ }
+ 
+ /**
++ * ixgbevf_negotiate_features_vf - negotiate supported features with PF driver
++ * @hw: pointer to the HW structure
++ * @pf_features: bitmask of features supported by PF
++ *
++ * Return: IXGBE_ERR_MBX in the  case of mailbox error,
++ * -EOPNOTSUPP if the op is not supported or 0 on success.
++ */
++static int ixgbevf_negotiate_features_vf(struct ixgbe_hw *hw, u32 *pf_features)
++{
++      u32 msgbuf[2] = {};
++      int err;
++
++      switch (hw->api_version) {
++      case ixgbe_mbox_api_17:
++              break;
++      default:
++              return -EOPNOTSUPP;
++      }
++
++      msgbuf[0] = IXGBE_VF_FEATURES_NEGOTIATE;
++      msgbuf[1] = IXGBEVF_SUPPORTED_FEATURES;
++
++      err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf,
++                                       ARRAY_SIZE(msgbuf));
++
++      if (err || (msgbuf[0] & IXGBE_VT_MSGTYPE_FAILURE)) {
++              err = IXGBE_ERR_MBX;
++              *pf_features = 0x0;
++      } else {
++              *pf_features = msgbuf[1];
++      }
++
++      return err;
++}
++
++/**
+  *  ixgbevf_set_vfta_vf - Set/Unset VLAN filter table address
+  *  @hw: pointer to the HW structure
+  *  @vlan: 12 bit VLAN ID
+@@ -799,6 +839,7 @@ static s32 ixgbevf_check_mac_link_vf(str
+                                    bool *link_up,
+                                    bool autoneg_wait_to_complete)
+ {
++      struct ixgbevf_adapter *adapter = hw->back;
+       struct ixgbe_mbx_info *mbx = &hw->mbx;
+       struct ixgbe_mac_info *mac = &hw->mac;
+       s32 ret_val = 0;
+@@ -825,7 +866,7 @@ static s32 ixgbevf_check_mac_link_vf(str
+        * until we are called again and don't report an error
+        */
+       if (mbx->ops.read(hw, &in_msg, 1)) {
+-              if (hw->api_version >= ixgbe_mbox_api_15)
++              if (adapter->pf_features & IXGBEVF_PF_SUP_ESX_MBX)
+                       mac->get_link_status = false;
+               goto out;
+       }
+@@ -1026,6 +1067,7 @@ int ixgbevf_get_queues(struct ixgbe_hw *
+       case ixgbe_mbox_api_14:
+       case ixgbe_mbox_api_15:
+       case ixgbe_mbox_api_16:
++      case ixgbe_mbox_api_17:
+               break;
+       default:
+               return 0;
+@@ -1080,6 +1122,7 @@ static const struct ixgbe_mac_operations
+       .setup_link             = ixgbevf_setup_mac_link_vf,
+       .check_link             = ixgbevf_check_mac_link_vf,
+       .negotiate_api_version  = ixgbevf_negotiate_api_version_vf,
++      .negotiate_features     = ixgbevf_negotiate_features_vf,
+       .set_rar                = ixgbevf_set_rar_vf,
+       .update_mc_addr_list    = ixgbevf_update_mc_addr_list_vf,
+       .update_xcast_mode      = ixgbevf_update_xcast_mode,
+--- a/drivers/net/ethernet/intel/ixgbevf/vf.h
++++ b/drivers/net/ethernet/intel/ixgbevf/vf.h
+@@ -26,6 +26,7 @@ struct ixgbe_mac_operations {
+       s32 (*stop_adapter)(struct ixgbe_hw *);
+       s32 (*get_bus_info)(struct ixgbe_hw *);
+       s32 (*negotiate_api_version)(struct ixgbe_hw *hw, int api);
++      int (*negotiate_features)(struct ixgbe_hw *hw, u32 *pf_features);
+ 
+       /* Link */
+       s32 (*setup_link)(struct ixgbe_hw *, ixgbe_link_speed, bool, bool);
diff --git a/queue-6.12/md-fix-mssing-blktrace-bio-split-events.patch b/queue-6.12/md-fix-mssing-blktrace-bio-split-events.patch

new file mode 100644 (file)

index 0000000..61d0e4f
--- /dev/null
+++ b/queue-6.12/md-fix-mssing-blktrace-bio-split-events.patch
@@ -0,0 +1,142 @@
+From stable+bounces-188107-greg=kroah.com@vger.kernel.org Mon Oct 20 15:07:08 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Oct 2025 09:06:49 -0400
+Subject: md: fix mssing blktrace bio split events
+To: stable@vger.kernel.org
+Cc: Yu Kuai <yukuai3@huawei.com>, Damien Le Moal <dlemoal@kernel.org>, Christoph Hellwig <hch@lst.de>, Jens Axboe <axboe@kernel.dk>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251020130649.1765603-4-sashal@kernel.org>
+
+From: Yu Kuai <yukuai3@huawei.com>
+
+[ Upstream commit 22f166218f7313e8fe2d19213b5f4b3265f8c39e ]
+
+If bio is split by internal handling like chunksize or badblocks, the
+corresponding trace_block_split() is missing, resulting in blktrace
+inability to catch BIO split events and making it harder to analyze the
+BIO sequence.
+
+Cc: stable@vger.kernel.org
+Fixes: 4b1faf931650 ("block: Kill bio_pair_split()")
+Signed-off-by: Yu Kuai <yukuai3@huawei.com>
+Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/md-linear.c |    1 +
+ drivers/md/raid0.c     |    4 ++++
+ drivers/md/raid1.c     |    4 ++++
+ drivers/md/raid10.c    |    8 ++++++++
+ drivers/md/raid5.c     |    2 ++
+ 5 files changed, 19 insertions(+)
+
+--- a/drivers/md/md-linear.c
++++ b/drivers/md/md-linear.c
+@@ -267,6 +267,7 @@ static bool linear_make_request(struct m
+               }
+ 
+               bio_chain(split, bio);
++              trace_block_split(split, bio->bi_iter.bi_sector);
+               submit_bio_noacct(bio);
+               bio = split;
+       }
+--- a/drivers/md/raid0.c
++++ b/drivers/md/raid0.c
+@@ -470,7 +470,9 @@ static void raid0_handle_discard(struct
+                       bio_endio(bio);
+                       return;
+               }
++
+               bio_chain(split, bio);
++              trace_block_split(split, bio->bi_iter.bi_sector);
+               submit_bio_noacct(bio);
+               bio = split;
+               end = zone->zone_end;
+@@ -618,7 +620,9 @@ static bool raid0_make_request(struct md
+                       bio_endio(bio);
+                       return true;
+               }
++
+               bio_chain(split, bio);
++              trace_block_split(split, bio->bi_iter.bi_sector);
+               raid0_map_submit_bio(mddev, bio);
+               bio = split;
+       }
+--- a/drivers/md/raid1.c
++++ b/drivers/md/raid1.c
+@@ -1383,7 +1383,9 @@ static void raid1_read_request(struct md
+                       error = PTR_ERR(split);
+                       goto err_handle;
+               }
++
+               bio_chain(split, bio);
++              trace_block_split(split, bio->bi_iter.bi_sector);
+               submit_bio_noacct(bio);
+               bio = split;
+               r1_bio->master_bio = bio;
+@@ -1574,7 +1576,9 @@ static void raid1_write_request(struct m
+                       error = PTR_ERR(split);
+                       goto err_handle;
+               }
++
+               bio_chain(split, bio);
++              trace_block_split(split, bio->bi_iter.bi_sector);
+               submit_bio_noacct(bio);
+               bio = split;
+               r1_bio->master_bio = bio;
+--- a/drivers/md/raid10.c
++++ b/drivers/md/raid10.c
+@@ -1208,7 +1208,9 @@ static void raid10_read_request(struct m
+                       error = PTR_ERR(split);
+                       goto err_handle;
+               }
++
+               bio_chain(split, bio);
++              trace_block_split(split, bio->bi_iter.bi_sector);
+               allow_barrier(conf);
+               submit_bio_noacct(bio);
+               wait_barrier(conf, false);
+@@ -1484,7 +1486,9 @@ static void raid10_write_request(struct
+                       error = PTR_ERR(split);
+                       goto err_handle;
+               }
++
+               bio_chain(split, bio);
++              trace_block_split(split, bio->bi_iter.bi_sector);
+               allow_barrier(conf);
+               submit_bio_noacct(bio);
+               wait_barrier(conf, false);
+@@ -1669,7 +1673,9 @@ static int raid10_handle_discard(struct
+                       bio_endio(bio);
+                       return 0;
+               }
++
+               bio_chain(split, bio);
++              trace_block_split(split, bio->bi_iter.bi_sector);
+               allow_barrier(conf);
+               /* Resend the fist split part */
+               submit_bio_noacct(split);
+@@ -1684,7 +1690,9 @@ static int raid10_handle_discard(struct
+                       bio_endio(bio);
+                       return 0;
+               }
++
+               bio_chain(split, bio);
++              trace_block_split(split, bio->bi_iter.bi_sector);
+               allow_barrier(conf);
+               /* Resend the second split part */
+               submit_bio_noacct(bio);
+--- a/drivers/md/raid5.c
++++ b/drivers/md/raid5.c
+@@ -5484,8 +5484,10 @@ static struct bio *chunk_aligned_read(st
+ 
+       if (sectors < bio_sectors(raid_bio)) {
+               struct r5conf *conf = mddev->private;
++
+               split = bio_split(raid_bio, sectors, GFP_NOIO, &conf->bio_split);
+               bio_chain(split, raid_bio);
++              trace_block_split(split, raid_bio->bi_iter.bi_sector);
+               submit_bio_noacct(raid_bio);
+               raid_bio = split;
+       }
diff --git a/queue-6.12/md-raid0-handle-bio_split-errors.patch b/queue-6.12/md-raid0-handle-bio_split-errors.patch

new file mode 100644 (file)

index 0000000..de3a5a2
--- /dev/null
+++ b/queue-6.12/md-raid0-handle-bio_split-errors.patch
@@ -0,0 +1,55 @@
+From stable+bounces-188104-greg=kroah.com@vger.kernel.org Mon Oct 20 15:07:00 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Oct 2025 09:06:46 -0400
+Subject: md/raid0: Handle bio_split() errors
+To: stable@vger.kernel.org
+Cc: John Garry <john.g.garry@oracle.com>, Yu Kuai <yukuai3@huawei.com>, Hannes Reinecke <hare@suse.de>, Jens Axboe <axboe@kernel.dk>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251020130649.1765603-1-sashal@kernel.org>
+
+From: John Garry <john.g.garry@oracle.com>
+
+[ Upstream commit 74538fdac3e85aae55eb4ed786478ed2384cb85d ]
+
+Add proper bio_split() error handling. For any error, set bi_status, end
+the bio, and return.
+
+Reviewed-by: Yu Kuai <yukuai3@huawei.com>
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Signed-off-by: John Garry <john.g.garry@oracle.com>
+Link: https://lore.kernel.org/r/20241111112150.3756529-5-john.g.garry@oracle.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Stable-dep-of: 22f166218f73 ("md: fix mssing blktrace bio split events")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/raid0.c |   12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+--- a/drivers/md/raid0.c
++++ b/drivers/md/raid0.c
+@@ -464,6 +464,12 @@ static void raid0_handle_discard(struct
+               struct bio *split = bio_split(bio,
+                       zone->zone_end - bio->bi_iter.bi_sector, GFP_NOIO,
+                       &mddev->bio_set);
++
++              if (IS_ERR(split)) {
++                      bio->bi_status = errno_to_blk_status(PTR_ERR(split));
++                      bio_endio(bio);
++                      return;
++              }
+               bio_chain(split, bio);
+               submit_bio_noacct(bio);
+               bio = split;
+@@ -606,6 +612,12 @@ static bool raid0_make_request(struct md
+       if (sectors < bio_sectors(bio)) {
+               struct bio *split = bio_split(bio, sectors, GFP_NOIO,
+                                             &mddev->bio_set);
++
++              if (IS_ERR(split)) {
++                      bio->bi_status = errno_to_blk_status(PTR_ERR(split));
++                      bio_endio(bio);
++                      return true;
++              }
+               bio_chain(split, bio);
+               raid0_map_submit_bio(mddev, bio);
+               bio = split;
diff --git a/queue-6.12/md-raid1-handle-bio_split-errors.patch b/queue-6.12/md-raid1-handle-bio_split-errors.patch

new file mode 100644 (file)

index 0000000..ee2baa1
--- /dev/null
+++ b/queue-6.12/md-raid1-handle-bio_split-errors.patch
@@ -0,0 +1,110 @@
+From stable+bounces-188105-greg=kroah.com@vger.kernel.org Mon Oct 20 15:08:36 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Oct 2025 09:06:47 -0400
+Subject: md/raid1: Handle bio_split() errors
+To: stable@vger.kernel.org
+Cc: John Garry <john.g.garry@oracle.com>, Yu Kuai <yukuai3@huawei.com>, Hannes Reinecke <hare@suse.de>, Jens Axboe <axboe@kernel.dk>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251020130649.1765603-2-sashal@kernel.org>
+
+From: John Garry <john.g.garry@oracle.com>
+
+[ Upstream commit b1a7ad8b5c4fa28325ee7b369a2d545d3e16ccde ]
+
+Add proper bio_split() error handling. For any error, call
+raid_end_bio_io() and return.
+
+For the case of an in the write path, we need to undo the increment in
+the rdev pending count and NULLify the r1_bio->bios[] pointers.
+
+For read path failure, we need to undo rdev pending count increment from
+the earlier read_balance() call.
+
+Reviewed-by: Yu Kuai <yukuai3@huawei.com>
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Signed-off-by: John Garry <john.g.garry@oracle.com>
+Link: https://lore.kernel.org/r/20241111112150.3756529-6-john.g.garry@oracle.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Stable-dep-of: 22f166218f73 ("md: fix mssing blktrace bio split events")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/raid1.c |   33 +++++++++++++++++++++++++++++++--
+ 1 file changed, 31 insertions(+), 2 deletions(-)
+
+--- a/drivers/md/raid1.c
++++ b/drivers/md/raid1.c
+@@ -1317,7 +1317,7 @@ static void raid1_read_request(struct md
+       struct raid1_info *mirror;
+       struct bio *read_bio;
+       int max_sectors;
+-      int rdisk;
++      int rdisk, error;
+       bool r1bio_existed = !!r1_bio;
+ 
+       /*
+@@ -1378,6 +1378,11 @@ static void raid1_read_request(struct md
+       if (max_sectors < bio_sectors(bio)) {
+               struct bio *split = bio_split(bio, max_sectors,
+                                             gfp, &conf->bio_split);
++
++              if (IS_ERR(split)) {
++                      error = PTR_ERR(split);
++                      goto err_handle;
++              }
+               bio_chain(split, bio);
+               submit_bio_noacct(bio);
+               bio = split;
+@@ -1404,6 +1409,13 @@ static void raid1_read_request(struct md
+       read_bio->bi_private = r1_bio;
+       mddev_trace_remap(mddev, read_bio, r1_bio->sector);
+       submit_bio_noacct(read_bio);
++      return;
++
++err_handle:
++      atomic_dec(&mirror->rdev->nr_pending);
++      bio->bi_status = errno_to_blk_status(error);
++      set_bit(R1BIO_Uptodate, &r1_bio->state);
++      raid_end_bio_io(r1_bio);
+ }
+ 
+ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
+@@ -1411,7 +1423,7 @@ static void raid1_write_request(struct m
+ {
+       struct r1conf *conf = mddev->private;
+       struct r1bio *r1_bio;
+-      int i, disks;
++      int i, disks, k, error;
+       unsigned long flags;
+       struct md_rdev *blocked_rdev;
+       int first_clone;
+@@ -1557,6 +1569,11 @@ static void raid1_write_request(struct m
+       if (max_sectors < bio_sectors(bio)) {
+               struct bio *split = bio_split(bio, max_sectors,
+                                             GFP_NOIO, &conf->bio_split);
++
++              if (IS_ERR(split)) {
++                      error = PTR_ERR(split);
++                      goto err_handle;
++              }
+               bio_chain(split, bio);
+               submit_bio_noacct(bio);
+               bio = split;
+@@ -1640,6 +1657,18 @@ static void raid1_write_request(struct m
+ 
+       /* In case raid1d snuck in to freeze_array */
+       wake_up_barrier(conf);
++      return;
++err_handle:
++      for (k = 0; k < i; k++) {
++              if (r1_bio->bios[k]) {
++                      rdev_dec_pending(conf->mirrors[k].rdev, mddev);
++                      r1_bio->bios[k] = NULL;
++              }
++      }
++
++      bio->bi_status = errno_to_blk_status(error);
++      set_bit(R1BIO_Uptodate, &r1_bio->state);
++      raid_end_bio_io(r1_bio);
+ }
+ 
+ static bool raid1_make_request(struct mddev *mddev, struct bio *bio)
diff --git a/queue-6.12/md-raid10-handle-bio_split-errors.patch b/queue-6.12/md-raid10-handle-bio_split-errors.patch

new file mode 100644 (file)

index 0000000..769d8d2
--- /dev/null
+++ b/queue-6.12/md-raid10-handle-bio_split-errors.patch
@@ -0,0 +1,135 @@
+From stable+bounces-188106-greg=kroah.com@vger.kernel.org Mon Oct 20 15:07:02 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Oct 2025 09:06:48 -0400
+Subject: md/raid10: Handle bio_split() errors
+To: stable@vger.kernel.org
+Cc: John Garry <john.g.garry@oracle.com>, Yu Kuai <yukuai3@huawei.com>, Hannes Reinecke <hare@suse.de>, Jens Axboe <axboe@kernel.dk>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251020130649.1765603-3-sashal@kernel.org>
+
+From: John Garry <john.g.garry@oracle.com>
+
+[ Upstream commit 4cf58d9529097328b669e3c8693ed21e3a041903 ]
+
+Add proper bio_split() error handling. For any error, call
+raid_end_bio_io() and return. Except for discard, where we end the bio
+directly.
+
+Reviewed-by: Yu Kuai <yukuai3@huawei.com>
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Signed-off-by: John Garry <john.g.garry@oracle.com>
+Link: https://lore.kernel.org/r/20241111112150.3756529-7-john.g.garry@oracle.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Stable-dep-of: 22f166218f73 ("md: fix mssing blktrace bio split events")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/raid10.c |   47 ++++++++++++++++++++++++++++++++++++++++++++++-
+ 1 file changed, 46 insertions(+), 1 deletion(-)
+
+--- a/drivers/md/raid10.c
++++ b/drivers/md/raid10.c
+@@ -1153,6 +1153,7 @@ static void raid10_read_request(struct m
+       int slot = r10_bio->read_slot;
+       struct md_rdev *err_rdev = NULL;
+       gfp_t gfp = GFP_NOIO;
++      int error;
+ 
+       if (slot >= 0 && r10_bio->devs[slot].rdev) {
+               /*
+@@ -1203,6 +1204,10 @@ static void raid10_read_request(struct m
+       if (max_sectors < bio_sectors(bio)) {
+               struct bio *split = bio_split(bio, max_sectors,
+                                             gfp, &conf->bio_split);
++              if (IS_ERR(split)) {
++                      error = PTR_ERR(split);
++                      goto err_handle;
++              }
+               bio_chain(split, bio);
+               allow_barrier(conf);
+               submit_bio_noacct(bio);
+@@ -1233,6 +1238,11 @@ static void raid10_read_request(struct m
+       mddev_trace_remap(mddev, read_bio, r10_bio->sector);
+       submit_bio_noacct(read_bio);
+       return;
++err_handle:
++      atomic_dec(&rdev->nr_pending);
++      bio->bi_status = errno_to_blk_status(error);
++      set_bit(R10BIO_Uptodate, &r10_bio->state);
++      raid_end_bio_io(r10_bio);
+ }
+ 
+ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
+@@ -1341,9 +1351,10 @@ static void raid10_write_request(struct
+                                struct r10bio *r10_bio)
+ {
+       struct r10conf *conf = mddev->private;
+-      int i;
++      int i, k;
+       sector_t sectors;
+       int max_sectors;
++      int error;
+ 
+       if ((mddev_is_clustered(mddev) &&
+            md_cluster_ops->area_resyncing(mddev, WRITE,
+@@ -1469,6 +1480,10 @@ static void raid10_write_request(struct
+       if (r10_bio->sectors < bio_sectors(bio)) {
+               struct bio *split = bio_split(bio, r10_bio->sectors,
+                                             GFP_NOIO, &conf->bio_split);
++              if (IS_ERR(split)) {
++                      error = PTR_ERR(split);
++                      goto err_handle;
++              }
+               bio_chain(split, bio);
+               allow_barrier(conf);
+               submit_bio_noacct(bio);
+@@ -1488,6 +1503,26 @@ static void raid10_write_request(struct
+                       raid10_write_one_disk(mddev, r10_bio, bio, true, i);
+       }
+       one_write_done(r10_bio);
++      return;
++err_handle:
++      for (k = 0;  k < i; k++) {
++              int d = r10_bio->devs[k].devnum;
++              struct md_rdev *rdev = conf->mirrors[d].rdev;
++              struct md_rdev *rrdev = conf->mirrors[d].replacement;
++
++              if (r10_bio->devs[k].bio) {
++                      rdev_dec_pending(rdev, mddev);
++                      r10_bio->devs[k].bio = NULL;
++              }
++              if (r10_bio->devs[k].repl_bio) {
++                      rdev_dec_pending(rrdev, mddev);
++                      r10_bio->devs[k].repl_bio = NULL;
++              }
++      }
++
++      bio->bi_status = errno_to_blk_status(error);
++      set_bit(R10BIO_Uptodate, &r10_bio->state);
++      raid_end_bio_io(r10_bio);
+ }
+ 
+ static void __make_request(struct mddev *mddev, struct bio *bio, int sectors)
+@@ -1629,6 +1664,11 @@ static int raid10_handle_discard(struct
+       if (remainder) {
+               split_size = stripe_size - remainder;
+               split = bio_split(bio, split_size, GFP_NOIO, &conf->bio_split);
++              if (IS_ERR(split)) {
++                      bio->bi_status = errno_to_blk_status(PTR_ERR(split));
++                      bio_endio(bio);
++                      return 0;
++              }
+               bio_chain(split, bio);
+               allow_barrier(conf);
+               /* Resend the fist split part */
+@@ -1639,6 +1679,11 @@ static int raid10_handle_discard(struct
+       if (remainder) {
+               split_size = bio_sectors(bio) - remainder;
+               split = bio_split(bio, split_size, GFP_NOIO, &conf->bio_split);
++              if (IS_ERR(split)) {
++                      bio->bi_status = errno_to_blk_status(PTR_ERR(split));
++                      bio_endio(bio);
++                      return 0;
++              }
+               bio_chain(split, bio);
+               allow_barrier(conf);
+               /* Resend the second split part */
diff --git a/queue-6.12/mptcp-call-dst_release-in-mptcp_active_enable.patch b/queue-6.12/mptcp-call-dst_release-in-mptcp_active_enable.patch

new file mode 100644 (file)

index 0000000..323e501
--- /dev/null
+++ b/queue-6.12/mptcp-call-dst_release-in-mptcp_active_enable.patch
@@ -0,0 +1,42 @@
+From stable+bounces-188152-greg=kroah.com@vger.kernel.org Mon Oct 20 17:44:25 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Oct 2025 11:44:07 -0400
+Subject: mptcp: Call dst_release() in mptcp_active_enable().
+To: stable@vger.kernel.org
+Cc: Kuniyuki Iwashima <kuniyu@google.com>, "Matthieu Baerts (NGI0)" <matttbe@kernel.org>, Eric Dumazet <edumazet@google.com>, Jakub Kicinski <kuba@kernel.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251020154409.1823664-6-sashal@kernel.org>
+
+From: Kuniyuki Iwashima <kuniyu@google.com>
+
+[ Upstream commit 108a86c71c93ff28087994e6107bc99ebe336629 ]
+
+mptcp_active_enable() calls sk_dst_get(), which returns dst with its
+refcount bumped, but forgot dst_release().
+
+Let's add missing dst_release().
+
+Cc: stable@vger.kernel.org
+Fixes: 27069e7cb3d1 ("mptcp: disable active MPTCP in case of blackhole")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
+Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://patch.msgid.link/20250916214758.650211-7-kuniyu@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 833d4313bc1e ("mptcp: reset blackhole on success with non-loopback ifaces")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/ctrl.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/mptcp/ctrl.c
++++ b/net/mptcp/ctrl.c
+@@ -385,6 +385,8 @@ void mptcp_active_enable(struct sock *sk
+ 
+               if (dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK))
+                       atomic_set(&pernet->active_disable_times, 0);
++
++              dst_release(dst);
+       }
+ }
+ 
diff --git a/queue-6.12/mptcp-reset-blackhole-on-success-with-non-loopback-ifaces.patch b/queue-6.12/mptcp-reset-blackhole-on-success-with-non-loopback-ifaces.patch

new file mode 100644 (file)

index 0000000..4f81f20
--- /dev/null
+++ b/queue-6.12/mptcp-reset-blackhole-on-success-with-non-loopback-ifaces.patch
@@ -0,0 +1,52 @@
+From stable+bounces-188154-greg=kroah.com@vger.kernel.org Mon Oct 20 17:44:31 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Oct 2025 11:44:09 -0400
+Subject: mptcp: reset blackhole on success with non-loopback ifaces
+To: stable@vger.kernel.org
+Cc: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>, Simon Horman <horms@kernel.org>, Kuniyuki Iwashima <kuniyu@google.com>, Jakub Kicinski <kuba@kernel.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251020154409.1823664-8-sashal@kernel.org>
+
+From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
+
+[ Upstream commit 833d4313bc1e9e194814917d23e8874d6b651649 ]
+
+When a first MPTCP connection gets successfully established after a
+blackhole period, 'active_disable_times' was supposed to be reset when
+this connection was done via any non-loopback interfaces.
+
+Unfortunately, the opposite condition was checked: only reset when the
+connection was established via a loopback interface. Fixing this by
+simply looking at the opposite.
+
+This is similar to what is done with TCP FastOpen, see
+tcp_fastopen_active_disable_ofo_check().
+
+This patch is a follow-up of a previous discussion linked to commit
+893c49a78d9f ("mptcp: Use __sk_dst_get() and dst_dev_rcu() in
+mptcp_active_enable()."), see [1].
+
+Fixes: 27069e7cb3d1 ("mptcp: disable active MPTCP in case of blackhole")
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/4209a283-8822-47bd-95b7-87e96d9b7ea3@kernel.org [1]
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
+Link: https://patch.msgid.link/20250918-net-next-mptcp-blackhole-reset-loopback-v1-1-bf5818326639@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/ctrl.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/mptcp/ctrl.c
++++ b/net/mptcp/ctrl.c
+@@ -387,7 +387,7 @@ void mptcp_active_enable(struct sock *sk
+               rcu_read_lock();
+               dst = __sk_dst_get(sk);
+               dev = dst ? dst_dev_rcu(dst) : NULL;
+-              if (dev && (dev->flags & IFF_LOOPBACK))
++              if (!(dev && (dev->flags & IFF_LOOPBACK)))
+                       atomic_set(&pernet->active_disable_times, 0);
+               rcu_read_unlock();
+       }
diff --git a/queue-6.12/mptcp-use-__sk_dst_get-and-dst_dev_rcu-in-mptcp_active_enable.patch b/queue-6.12/mptcp-use-__sk_dst_get-and-dst_dev_rcu-in-mptcp_active_enable.patch

new file mode 100644 (file)

index 0000000..6218c7c
--- /dev/null
+++ b/queue-6.12/mptcp-use-__sk_dst_get-and-dst_dev_rcu-in-mptcp_active_enable.patch
@@ -0,0 +1,55 @@
+From stable+bounces-188153-greg=kroah.com@vger.kernel.org Mon Oct 20 17:47:15 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Oct 2025 11:44:08 -0400
+Subject: mptcp: Use __sk_dst_get() and dst_dev_rcu() in mptcp_active_enable().
+To: stable@vger.kernel.org
+Cc: Kuniyuki Iwashima <kuniyu@google.com>, "Matthieu Baerts (NGI0)" <matttbe@kernel.org>, Eric Dumazet <edumazet@google.com>, Jakub Kicinski <kuba@kernel.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251020154409.1823664-7-sashal@kernel.org>
+
+From: Kuniyuki Iwashima <kuniyu@google.com>
+
+[ Upstream commit 893c49a78d9f85e4b8081b908fb7c407d018106a ]
+
+mptcp_active_enable() is called from subflow_finish_connect(),
+which is icsk->icsk_af_ops->sk_rx_dst_set() and it's not always
+under RCU.
+
+Using sk_dst_get(sk)->dev could trigger UAF.
+
+Let's use __sk_dst_get() and dst_dev_rcu().
+
+Fixes: 27069e7cb3d1 ("mptcp: disable active MPTCP in case of blackhole")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
+Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://patch.msgid.link/20250916214758.650211-8-kuniyu@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 833d4313bc1e ("mptcp: reset blackhole on success with non-loopback ifaces")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/ctrl.c |   11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+--- a/net/mptcp/ctrl.c
++++ b/net/mptcp/ctrl.c
+@@ -381,12 +381,15 @@ void mptcp_active_enable(struct sock *sk
+       struct mptcp_pernet *pernet = mptcp_get_pernet(sock_net(sk));
+ 
+       if (atomic_read(&pernet->active_disable_times)) {
+-              struct dst_entry *dst = sk_dst_get(sk);
++              struct net_device *dev;
++              struct dst_entry *dst;
+ 
+-              if (dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK))
++              rcu_read_lock();
++              dst = __sk_dst_get(sk);
++              dev = dst ? dst_dev_rcu(dst) : NULL;
++              if (dev && (dev->flags & IFF_LOOPBACK))
+                       atomic_set(&pernet->active_disable_times, 0);
+-
+-              dst_release(dst);
++              rcu_read_unlock();
+       }
+ }
+ 
diff --git a/queue-6.12/net-add-locking-to-protect-skb-dev-access-in-ip_output.patch b/queue-6.12/net-add-locking-to-protect-skb-dev-access-in-ip_output.patch

new file mode 100644 (file)

index 0000000..336606f
--- /dev/null
+++ b/queue-6.12/net-add-locking-to-protect-skb-dev-access-in-ip_output.patch
@@ -0,0 +1,121 @@
+From stable+bounces-188151-greg=kroah.com@vger.kernel.org Mon Oct 20 17:44:27 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Oct 2025 11:44:06 -0400
+Subject: net: Add locking to protect skb->dev access in ip_output
+To: stable@vger.kernel.org
+Cc: Sharath Chandra Vurukala <quic_sharathv@quicinc.com>, Eric Dumazet <edumazet@google.com>, Jakub Kicinski <kuba@kernel.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251020154409.1823664-5-sashal@kernel.org>
+
+From: Sharath Chandra Vurukala <quic_sharathv@quicinc.com>
+
+[ Upstream commit 1dbf1d590d10a6d1978e8184f8dfe20af22d680a ]
+
+In ip_output() skb->dev is updated from the skb_dst(skb)->dev
+this can become invalid when the interface is unregistered and freed,
+
+Introduced new skb_dst_dev_rcu() function to be used instead of
+skb_dst_dev() within rcu_locks in ip_output.This will ensure that
+all the skb's associated with the dev being deregistered will
+be transnmitted out first, before freeing the dev.
+
+Given that ip_output() is called within an rcu_read_lock()
+critical section or from a bottom-half context, it is safe to introduce
+an RCU read-side critical section within it.
+
+Multiple panic call stacks were observed when UL traffic was run
+in concurrency with device deregistration from different functions,
+pasting one sample for reference.
+
+[496733.627565][T13385] Call trace:
+[496733.627570][T13385] bpf_prog_ce7c9180c3b128ea_cgroupskb_egres+0x24c/0x7f0
+[496733.627581][T13385] __cgroup_bpf_run_filter_skb+0x128/0x498
+[496733.627595][T13385] ip_finish_output+0xa4/0xf4
+[496733.627605][T13385] ip_output+0x100/0x1a0
+[496733.627613][T13385] ip_send_skb+0x68/0x100
+[496733.627618][T13385] udp_send_skb+0x1c4/0x384
+[496733.627625][T13385] udp_sendmsg+0x7b0/0x898
+[496733.627631][T13385] inet_sendmsg+0x5c/0x7c
+[496733.627639][T13385] __sys_sendto+0x174/0x1e4
+[496733.627647][T13385] __arm64_sys_sendto+0x28/0x3c
+[496733.627653][T13385] invoke_syscall+0x58/0x11c
+[496733.627662][T13385] el0_svc_common+0x88/0xf4
+[496733.627669][T13385] do_el0_svc+0x2c/0xb0
+[496733.627676][T13385] el0_svc+0x2c/0xa4
+[496733.627683][T13385] el0t_64_sync_handler+0x68/0xb4
+[496733.627689][T13385] el0t_64_sync+0x1a4/0x1a8
+
+Changes in v3:
+- Replaced WARN_ON() with  WARN_ON_ONCE(), as suggested by Willem de Bruijn.
+- Dropped legacy lines mistakenly pulled in from an outdated branch.
+
+Changes in v2:
+- Addressed review comments from Eric Dumazet
+- Used READ_ONCE() to prevent potential load/store tearing
+- Added skb_dst_dev_rcu() and used along with rcu_read_lock() in ip_output
+
+Signed-off-by: Sharath Chandra Vurukala <quic_sharathv@quicinc.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://patch.msgid.link/20250730105118.GA26100@hu-sharathv-hyd.qualcomm.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 833d4313bc1e ("mptcp: reset blackhole on success with non-loopback ifaces")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/dst.h    |   12 ++++++++++++
+ net/ipv4/ip_output.c |   15 ++++++++++-----
+ 2 files changed, 22 insertions(+), 5 deletions(-)
+
+--- a/include/net/dst.h
++++ b/include/net/dst.h
+@@ -566,11 +566,23 @@ static inline struct net_device *dst_dev
+       return READ_ONCE(dst->dev);
+ }
+ 
++static inline struct net_device *dst_dev_rcu(const struct dst_entry *dst)
++{
++      /* In the future, use rcu_dereference(dst->dev) */
++      WARN_ON_ONCE(!rcu_read_lock_held());
++      return READ_ONCE(dst->dev);
++}
++
+ static inline struct net_device *skb_dst_dev(const struct sk_buff *skb)
+ {
+       return dst_dev(skb_dst(skb));
+ }
+ 
++static inline struct net_device *skb_dst_dev_rcu(const struct sk_buff *skb)
++{
++      return dst_dev_rcu(skb_dst(skb));
++}
++
+ static inline struct net *skb_dst_dev_net(const struct sk_buff *skb)
+ {
+       return dev_net(skb_dst_dev(skb));
+--- a/net/ipv4/ip_output.c
++++ b/net/ipv4/ip_output.c
+@@ -426,15 +426,20 @@ int ip_mc_output(struct net *net, struct
+ 
+ int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+ {
+-      struct net_device *dev = skb_dst_dev(skb), *indev = skb->dev;
++      struct net_device *dev, *indev = skb->dev;
++      int ret_val;
+ 
++      rcu_read_lock();
++      dev = skb_dst_dev_rcu(skb);
+       skb->dev = dev;
+       skb->protocol = htons(ETH_P_IP);
+ 
+-      return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
+-                          net, sk, skb, indev, dev,
+-                          ip_finish_output,
+-                          !(IPCB(skb)->flags & IPSKB_REROUTED));
++      ret_val = NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
++                              net, sk, skb, indev, dev,
++                              ip_finish_output,
++                              !(IPCB(skb)->flags & IPSKB_REROUTED));
++      rcu_read_unlock();
++      return ret_val;
+ }
+ EXPORT_SYMBOL(ip_output);
+ 
diff --git a/queue-6.12/net-dst-add-four-helpers-to-annotate-data-races-around-dst-dev.patch b/queue-6.12/net-dst-add-four-helpers-to-annotate-data-races-around-dst-dev.patch

new file mode 100644 (file)

index 0000000..cf5160f
--- /dev/null
+++ b/queue-6.12/net-dst-add-four-helpers-to-annotate-data-races-around-dst-dev.patch
@@ -0,0 +1,135 @@
+From stable+bounces-188149-greg=kroah.com@vger.kernel.org Mon Oct 20 17:47:03 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Oct 2025 11:44:04 -0400
+Subject: net: dst: add four helpers to annotate data-races around dst->dev
+To: stable@vger.kernel.org
+Cc: Eric Dumazet <edumazet@google.com>, Kuniyuki Iwashima <kuniyu@google.com>, Jakub Kicinski <kuba@kernel.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251020154409.1823664-3-sashal@kernel.org>
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 88fe14253e181878c2ddb51a298ae8c468a63010 ]
+
+dst->dev is read locklessly in many contexts,
+and written in dst_dev_put().
+
+Fixing all the races is going to need many changes.
+
+We probably will have to add full RCU protection.
+
+Add three helpers to ease this painful process.
+
+static inline struct net_device *dst_dev(const struct dst_entry *dst)
+{
+       return READ_ONCE(dst->dev);
+}
+
+static inline struct net_device *skb_dst_dev(const struct sk_buff *skb)
+{
+       return dst_dev(skb_dst(skb));
+}
+
+static inline struct net *skb_dst_dev_net(const struct sk_buff *skb)
+{
+       return dev_net(skb_dst_dev(skb));
+}
+
+static inline struct net *skb_dst_dev_net_rcu(const struct sk_buff *skb)
+{
+       return dev_net_rcu(skb_dst_dev(skb));
+}
+
+Fixes: 4a6ce2b6f2ec ("net: introduce a new function dst_dev_put()")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
+Link: https://patch.msgid.link/20250630121934.3399505-7-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 833d4313bc1e ("mptcp: reset blackhole on success with non-loopback ifaces")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/dst.h |   20 ++++++++++++++++++++
+ net/core/dst.c    |    4 ++--
+ net/core/sock.c   |    8 ++++----
+ 3 files changed, 26 insertions(+), 6 deletions(-)
+
+--- a/include/net/dst.h
++++ b/include/net/dst.h
+@@ -561,6 +561,26 @@ static inline void skb_dst_update_pmtu_n
+               dst->ops->update_pmtu(dst, NULL, skb, mtu, false);
+ }
+ 
++static inline struct net_device *dst_dev(const struct dst_entry *dst)
++{
++      return READ_ONCE(dst->dev);
++}
++
++static inline struct net_device *skb_dst_dev(const struct sk_buff *skb)
++{
++      return dst_dev(skb_dst(skb));
++}
++
++static inline struct net *skb_dst_dev_net(const struct sk_buff *skb)
++{
++      return dev_net(skb_dst_dev(skb));
++}
++
++static inline struct net *skb_dst_dev_net_rcu(const struct sk_buff *skb)
++{
++      return dev_net_rcu(skb_dst_dev(skb));
++}
++
+ struct dst_entry *dst_blackhole_check(struct dst_entry *dst, u32 cookie);
+ void dst_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
+                              struct sk_buff *skb, u32 mtu, bool confirm_neigh);
+--- a/net/core/dst.c
++++ b/net/core/dst.c
+@@ -150,7 +150,7 @@ void dst_dev_put(struct dst_entry *dst)
+               dst->ops->ifdown(dst, dev);
+       WRITE_ONCE(dst->input, dst_discard);
+       WRITE_ONCE(dst->output, dst_discard_out);
+-      dst->dev = blackhole_netdev;
++      WRITE_ONCE(dst->dev, blackhole_netdev);
+       netdev_ref_replace(dev, blackhole_netdev, &dst->dev_tracker,
+                          GFP_ATOMIC);
+ }
+@@ -263,7 +263,7 @@ unsigned int dst_blackhole_mtu(const str
+ {
+       unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
+ 
+-      return mtu ? : dst->dev->mtu;
++      return mtu ? : dst_dev(dst)->mtu;
+ }
+ EXPORT_SYMBOL_GPL(dst_blackhole_mtu);
+ 
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -2534,8 +2534,8 @@ static u32 sk_dst_gso_max_size(struct so
+                  !ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr));
+ #endif
+       /* pairs with the WRITE_ONCE() in netif_set_gso(_ipv4)_max_size() */
+-      max_size = is_ipv6 ? READ_ONCE(dst->dev->gso_max_size) :
+-                      READ_ONCE(dst->dev->gso_ipv4_max_size);
++      max_size = is_ipv6 ? READ_ONCE(dst_dev(dst)->gso_max_size) :
++                      READ_ONCE(dst_dev(dst)->gso_ipv4_max_size);
+       if (max_size > GSO_LEGACY_MAX_SIZE && !sk_is_tcp(sk))
+               max_size = GSO_LEGACY_MAX_SIZE;
+ 
+@@ -2546,7 +2546,7 @@ void sk_setup_caps(struct sock *sk, stru
+ {
+       u32 max_segs = 1;
+ 
+-      sk->sk_route_caps = dst->dev->features;
++      sk->sk_route_caps = dst_dev(dst)->features;
+       if (sk_is_tcp(sk)) {
+               struct inet_connection_sock *icsk = inet_csk(sk);
+ 
+@@ -2564,7 +2564,7 @@ void sk_setup_caps(struct sock *sk, stru
+                       sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
+                       sk->sk_gso_max_size = sk_dst_gso_max_size(sk, dst);
+                       /* pairs with the WRITE_ONCE() in netif_set_gso_max_segs() */
+-                      max_segs = max_t(u32, READ_ONCE(dst->dev->gso_max_segs), 1);
++                      max_segs = max_t(u32, READ_ONCE(dst_dev(dst)->gso_max_segs), 1);
+               }
+       }
+       sk->sk_gso_max_segs = max_segs;
diff --git a/queue-6.12/nfsd-define-a-proc_layoutcommit-for-the-flexfiles-layout-type.patch b/queue-6.12/nfsd-define-a-proc_layoutcommit-for-the-flexfiles-layout-type.patch

new file mode 100644 (file)

index 0000000..0ed4907
--- /dev/null
+++ b/queue-6.12/nfsd-define-a-proc_layoutcommit-for-the-flexfiles-layout-type.patch
@@ -0,0 +1,49 @@
+From stable+bounces-188247-greg=kroah.com@vger.kernel.org Mon Oct 20 22:33:56 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Oct 2025 16:33:43 -0400
+Subject: NFSD: Define a proc_layoutcommit for the FlexFiles layout type
+To: stable@vger.kernel.org
+Cc: Chuck Lever <chuck.lever@oracle.com>, Robert Morris <rtm@csail.mit.edu>, Thomas Haynes <loghyr@hammerspace.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251020203343.1907954-5-sashal@kernel.org>
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 4b47a8601b71ad98833b447d465592d847b4dc77 ]
+
+Avoid a crash if a pNFS client should happen to send a LAYOUTCOMMIT
+operation on a FlexFiles layout.
+
+Reported-by: Robert Morris <rtm@csail.mit.edu>
+Closes: https://lore.kernel.org/linux-nfs/152f99b2-ba35-4dec-93a9-4690e625dccd@oracle.com/T/#t
+Cc: Thomas Haynes <loghyr@hammerspace.com>
+Cc: stable@vger.kernel.org
+Fixes: 9b9960a0ca47 ("nfsd: Add a super simple flex file server")
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfsd/flexfilelayout.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/fs/nfsd/flexfilelayout.c
++++ b/fs/nfsd/flexfilelayout.c
+@@ -125,6 +125,13 @@ nfsd4_ff_proc_getdeviceinfo(struct super
+       return 0;
+ }
+ 
++static __be32
++nfsd4_ff_proc_layoutcommit(struct inode *inode, struct svc_rqst *rqstp,
++              struct nfsd4_layoutcommit *lcp)
++{
++      return nfs_ok;
++}
++
+ const struct nfsd4_layout_ops ff_layout_ops = {
+       .notify_types           =
+                       NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE,
+@@ -133,4 +140,5 @@ const struct nfsd4_layout_ops ff_layout_
+       .encode_getdeviceinfo   = nfsd4_ff_encode_getdeviceinfo,
+       .proc_layoutget         = nfsd4_ff_proc_layoutget,
+       .encode_layoutget       = nfsd4_ff_encode_layoutget,
++      .proc_layoutcommit      = nfsd4_ff_proc_layoutcommit,
+ };
diff --git a/queue-6.12/nfsd-drop-dprintk-in-blocklayout-xdr-functions.patch b/queue-6.12/nfsd-drop-dprintk-in-blocklayout-xdr-functions.patch

new file mode 100644 (file)

index 0000000..d17f52b
--- /dev/null
+++ b/queue-6.12/nfsd-drop-dprintk-in-blocklayout-xdr-functions.patch
@@ -0,0 +1,129 @@
+From stable+bounces-188066-greg=kroah.com@vger.kernel.org Mon Oct 20 14:53:20 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Oct 2025 08:53:00 -0400
+Subject: nfsd: Drop dprintk in blocklayout xdr functions
+To: stable@vger.kernel.org
+Cc: Sergey Bashirov <sergeybashirov@gmail.com>, Jeff Layton <jlayton@kernel.org>, Christoph Hellwig <hch@lst.de>, Chuck Lever <chuck.lever@oracle.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251020125305.1760219-2-sashal@kernel.org>
+
+From: Sergey Bashirov <sergeybashirov@gmail.com>
+
+[ Upstream commit e339967eecf1305557f7c697e1bc10b5cc495454 ]
+
+Minor clean up. Instead of dprintk there are appropriate error codes.
+
+Signed-off-by: Sergey Bashirov <sergeybashirov@gmail.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Stable-dep-of: d68886bae76a ("NFSD: Fix last write offset handling in layoutcommit")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfsd/blocklayoutxdr.c |   40 +++++++---------------------------------
+ 1 file changed, 7 insertions(+), 33 deletions(-)
+
+--- a/fs/nfsd/blocklayoutxdr.c
++++ b/fs/nfsd/blocklayoutxdr.c
+@@ -139,28 +139,19 @@ nfsd4_block_decode_layoutupdate(__be32 *
+       struct iomap *iomaps;
+       u32 nr_iomaps, i;
+ 
+-      if (len < sizeof(u32)) {
+-              dprintk("%s: extent array too small: %u\n", __func__, len);
++      if (len < sizeof(u32))
+               return nfserr_bad_xdr;
+-      }
+       len -= sizeof(u32);
+-      if (len % PNFS_BLOCK_EXTENT_SIZE) {
+-              dprintk("%s: extent array invalid: %u\n", __func__, len);
++      if (len % PNFS_BLOCK_EXTENT_SIZE)
+               return nfserr_bad_xdr;
+-      }
+ 
+       nr_iomaps = be32_to_cpup(p++);
+-      if (nr_iomaps != len / PNFS_BLOCK_EXTENT_SIZE) {
+-              dprintk("%s: extent array size mismatch: %u/%u\n",
+-                      __func__, len, nr_iomaps);
++      if (nr_iomaps != len / PNFS_BLOCK_EXTENT_SIZE)
+               return nfserr_bad_xdr;
+-      }
+ 
+       iomaps = kcalloc(nr_iomaps, sizeof(*iomaps), GFP_KERNEL);
+-      if (!iomaps) {
+-              dprintk("%s: failed to allocate extent array\n", __func__);
++      if (!iomaps)
+               return nfserr_delay;
+-      }
+ 
+       for (i = 0; i < nr_iomaps; i++) {
+               struct pnfs_block_extent bex;
+@@ -170,26 +161,18 @@ nfsd4_block_decode_layoutupdate(__be32 *
+ 
+               p = xdr_decode_hyper(p, &bex.foff);
+               if (bex.foff & (block_size - 1)) {
+-                      dprintk("%s: unaligned offset 0x%llx\n",
+-                              __func__, bex.foff);
+                       goto fail;
+               }
+               p = xdr_decode_hyper(p, &bex.len);
+               if (bex.len & (block_size - 1)) {
+-                      dprintk("%s: unaligned length 0x%llx\n",
+-                              __func__, bex.foff);
+                       goto fail;
+               }
+               p = xdr_decode_hyper(p, &bex.soff);
+               if (bex.soff & (block_size - 1)) {
+-                      dprintk("%s: unaligned disk offset 0x%llx\n",
+-                              __func__, bex.soff);
+                       goto fail;
+               }
+               bex.es = be32_to_cpup(p++);
+               if (bex.es != PNFS_BLOCK_READWRITE_DATA) {
+-                      dprintk("%s: incorrect extent state %d\n",
+-                              __func__, bex.es);
+                       goto fail;
+               }
+ 
+@@ -231,38 +214,29 @@ nfsd4_scsi_decode_layoutupdate(__be32 *p
+       struct iomap *iomaps;
+       u32 nr_iomaps, expected, i;
+ 
+-      if (len < sizeof(u32)) {
+-              dprintk("%s: extent array too small: %u\n", __func__, len);
++      if (len < sizeof(u32))
+               return nfserr_bad_xdr;
+-      }
+ 
+       nr_iomaps = be32_to_cpup(p++);
+       expected = sizeof(__be32) + nr_iomaps * PNFS_SCSI_RANGE_SIZE;
+-      if (len != expected) {
+-              dprintk("%s: extent array size mismatch: %u/%u\n",
+-                      __func__, len, expected);
++      if (len != expected)
+               return nfserr_bad_xdr;
+-      }
+ 
+       iomaps = kcalloc(nr_iomaps, sizeof(*iomaps), GFP_KERNEL);
+-      if (!iomaps) {
+-              dprintk("%s: failed to allocate extent array\n", __func__);
++      if (!iomaps)
+               return nfserr_delay;
+-      }
+ 
+       for (i = 0; i < nr_iomaps; i++) {
+               u64 val;
+ 
+               p = xdr_decode_hyper(p, &val);
+               if (val & (block_size - 1)) {
+-                      dprintk("%s: unaligned offset 0x%llx\n", __func__, val);
+                       goto fail;
+               }
+               iomaps[i].offset = val;
+ 
+               p = xdr_decode_hyper(p, &val);
+               if (val & (block_size - 1)) {
+-                      dprintk("%s: unaligned length 0x%llx\n", __func__, val);
+                       goto fail;
+               }
+               iomaps[i].length = val;
diff --git a/queue-6.12/nfsd-fix-last-write-offset-handling-in-layoutcommit.patch b/queue-6.12/nfsd-fix-last-write-offset-handling-in-layoutcommit.patch

new file mode 100644 (file)

index 0000000..83dcc5c
--- /dev/null
+++ b/queue-6.12/nfsd-fix-last-write-offset-handling-in-layoutcommit.patch
@@ -0,0 +1,113 @@
+From stable+bounces-188070-greg=kroah.com@vger.kernel.org Mon Oct 20 14:53:46 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Oct 2025 08:53:04 -0400
+Subject: NFSD: Fix last write offset handling in layoutcommit
+To: stable@vger.kernel.org
+Cc: Sergey Bashirov <sergeybashirov@gmail.com>, Konstantin Evtushenko <koevtushenko@yandex.com>, Christoph Hellwig <hch@lst.de>, Jeff Layton <jlayton@kernel.org>, Chuck Lever <chuck.lever@oracle.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251020125305.1760219-6-sashal@kernel.org>
+
+From: Sergey Bashirov <sergeybashirov@gmail.com>
+
+[ Upstream commit d68886bae76a4b9b3484d23e5b7df086f940fa38 ]
+
+The data type of loca_last_write_offset is newoffset4 and is switched
+on a boolean value, no_newoffset, that indicates if a previous write
+occurred or not. If no_newoffset is FALSE, an offset is not given.
+This means that client does not try to update the file size. Thus,
+server should not try to calculate new file size and check if it fits
+into the segment range. See RFC 8881, section 12.5.4.2.
+
+Sometimes the current incorrect logic may cause clients to hang when
+trying to sync an inode. If layoutcommit fails, the client marks the
+inode as dirty again.
+
+Fixes: 9cf514ccfacb ("nfsd: implement pNFS operations")
+Cc: stable@vger.kernel.org
+Co-developed-by: Konstantin Evtushenko <koevtushenko@yandex.com>
+Signed-off-by: Konstantin Evtushenko <koevtushenko@yandex.com>
+Signed-off-by: Sergey Bashirov <sergeybashirov@gmail.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfsd/blocklayout.c |    5 ++---
+ fs/nfsd/nfs4proc.c    |   30 +++++++++++++++---------------
+ 2 files changed, 17 insertions(+), 18 deletions(-)
+
+--- a/fs/nfsd/blocklayout.c
++++ b/fs/nfsd/blocklayout.c
+@@ -118,7 +118,6 @@ nfsd4_block_commit_blocks(struct inode *
+               struct iomap *iomaps, int nr_iomaps)
+ {
+       struct timespec64 mtime = inode_get_mtime(inode);
+-      loff_t new_size = lcp->lc_last_wr + 1;
+       struct iattr iattr = { .ia_valid = 0 };
+       int error;
+ 
+@@ -128,9 +127,9 @@ nfsd4_block_commit_blocks(struct inode *
+       iattr.ia_valid |= ATTR_ATIME | ATTR_CTIME | ATTR_MTIME;
+       iattr.ia_atime = iattr.ia_ctime = iattr.ia_mtime = lcp->lc_mtime;
+ 
+-      if (new_size > i_size_read(inode)) {
++      if (lcp->lc_size_chg) {
+               iattr.ia_valid |= ATTR_SIZE;
+-              iattr.ia_size = new_size;
++              iattr.ia_size = lcp->lc_newsize;
+       }
+ 
+       error = inode->i_sb->s_export_op->commit_blocks(inode, iomaps,
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -2362,7 +2362,6 @@ nfsd4_layoutcommit(struct svc_rqst *rqst
+       const struct nfsd4_layout_seg *seg = &lcp->lc_seg;
+       struct svc_fh *current_fh = &cstate->current_fh;
+       const struct nfsd4_layout_ops *ops;
+-      loff_t new_size = lcp->lc_last_wr + 1;
+       struct inode *inode;
+       struct nfs4_layout_stateid *ls;
+       __be32 nfserr;
+@@ -2378,13 +2377,21 @@ nfsd4_layoutcommit(struct svc_rqst *rqst
+               goto out;
+       inode = d_inode(current_fh->fh_dentry);
+ 
+-      nfserr = nfserr_inval;
+-      if (new_size <= seg->offset)
+-              goto out;
+-      if (new_size > seg->offset + seg->length)
+-              goto out;
+-      if (!lcp->lc_newoffset && new_size > i_size_read(inode))
+-              goto out;
++      lcp->lc_size_chg = false;
++      if (lcp->lc_newoffset) {
++              loff_t new_size = lcp->lc_last_wr + 1;
++
++              nfserr = nfserr_inval;
++              if (new_size <= seg->offset)
++                      goto out;
++              if (new_size > seg->offset + seg->length)
++                      goto out;
++
++              if (new_size > i_size_read(inode)) {
++                      lcp->lc_size_chg = true;
++                      lcp->lc_newsize = new_size;
++              }
++      }
+ 
+       nfserr = nfsd4_preprocess_layout_stateid(rqstp, cstate, &lcp->lc_sid,
+                                               false, lcp->lc_layout_type,
+@@ -2400,13 +2407,6 @@ nfsd4_layoutcommit(struct svc_rqst *rqst
+       /* LAYOUTCOMMIT does not require any serialization */
+       mutex_unlock(&ls->ls_mutex);
+ 
+-      if (new_size > i_size_read(inode)) {
+-              lcp->lc_size_chg = true;
+-              lcp->lc_newsize = new_size;
+-      } else {
+-              lcp->lc_size_chg = false;
+-      }
+-
+       nfserr = ops->proc_layoutcommit(inode, rqstp, lcp);
+       nfs4_put_stid(&ls->ls_stid);
+ out:
diff --git a/queue-6.12/nfsd-implement-large-extent-array-support-in-pnfs.patch b/queue-6.12/nfsd-implement-large-extent-array-support-in-pnfs.patch

new file mode 100644 (file)

index 0000000..6548752
--- /dev/null
+++ b/queue-6.12/nfsd-implement-large-extent-array-support-in-pnfs.patch
@@ -0,0 +1,335 @@
+From stable+bounces-188069-greg=kroah.com@vger.kernel.org Mon Oct 20 14:53:26 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Oct 2025 08:53:03 -0400
+Subject: NFSD: Implement large extent array support in pNFS
+To: stable@vger.kernel.org
+Cc: Sergey Bashirov <sergeybashirov@gmail.com>, Konstantin Evtushenko <koevtushenko@yandex.com>, Jeff Layton <jlayton@kernel.org>, Christoph Hellwig <hch@lst.de>, Chuck Lever <chuck.lever@oracle.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251020125305.1760219-5-sashal@kernel.org>
+
+From: Sergey Bashirov <sergeybashirov@gmail.com>
+
+[ Upstream commit f963cf2b91a30b5614c514f3ad53ca124cb65280 ]
+
+When pNFS client in the block or scsi layout mode sends layoutcommit
+to MDS, a variable length array of modified extents is supplied within
+the request. This patch allows the server to accept such extent arrays
+if they do not fit within single memory page.
+
+The issue can be reproduced when writing to a 1GB file using FIO with
+O_DIRECT, 4K block and large I/O depth without preallocation of the
+file. In this case, the server returns NFSERR_BADXDR to the client.
+
+Co-developed-by: Konstantin Evtushenko <koevtushenko@yandex.com>
+Signed-off-by: Konstantin Evtushenko <koevtushenko@yandex.com>
+Signed-off-by: Sergey Bashirov <sergeybashirov@gmail.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Stable-dep-of: d68886bae76a ("NFSD: Fix last write offset handling in layoutcommit")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfsd/blocklayout.c    |   20 ++++++-----
+ fs/nfsd/blocklayoutxdr.c |   83 +++++++++++++++++++++++++++++++----------------
+ fs/nfsd/blocklayoutxdr.h |    4 +-
+ fs/nfsd/nfs4proc.c       |    2 -
+ fs/nfsd/nfs4xdr.c        |   11 ++----
+ fs/nfsd/pnfs.h           |    1 
+ fs/nfsd/xdr4.h           |    3 -
+ 7 files changed, 78 insertions(+), 46 deletions(-)
+
+--- a/fs/nfsd/blocklayout.c
++++ b/fs/nfsd/blocklayout.c
+@@ -173,16 +173,18 @@ nfsd4_block_proc_getdeviceinfo(struct su
+ }
+ 
+ static __be32
+-nfsd4_block_proc_layoutcommit(struct inode *inode,
++nfsd4_block_proc_layoutcommit(struct inode *inode, struct svc_rqst *rqstp,
+               struct nfsd4_layoutcommit *lcp)
+ {
+       struct iomap *iomaps;
+       int nr_iomaps;
+       __be32 nfserr;
+ 
+-      nfserr = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout,
+-                      lcp->lc_up_len, &iomaps, &nr_iomaps,
+-                      i_blocksize(inode));
++      rqstp->rq_arg = lcp->lc_up_layout;
++      svcxdr_init_decode(rqstp);
++
++      nfserr = nfsd4_block_decode_layoutupdate(&rqstp->rq_arg_stream,
++                      &iomaps, &nr_iomaps, i_blocksize(inode));
+       if (nfserr != nfs_ok)
+               return nfserr;
+ 
+@@ -313,16 +315,18 @@ nfsd4_scsi_proc_getdeviceinfo(struct sup
+       return nfserrno(nfsd4_block_get_device_info_scsi(sb, clp, gdp));
+ }
+ static __be32
+-nfsd4_scsi_proc_layoutcommit(struct inode *inode,
++nfsd4_scsi_proc_layoutcommit(struct inode *inode, struct svc_rqst *rqstp,
+               struct nfsd4_layoutcommit *lcp)
+ {
+       struct iomap *iomaps;
+       int nr_iomaps;
+       __be32 nfserr;
+ 
+-      nfserr = nfsd4_scsi_decode_layoutupdate(lcp->lc_up_layout,
+-                      lcp->lc_up_len, &iomaps, &nr_iomaps,
+-                      i_blocksize(inode));
++      rqstp->rq_arg = lcp->lc_up_layout;
++      svcxdr_init_decode(rqstp);
++
++      nfserr = nfsd4_scsi_decode_layoutupdate(&rqstp->rq_arg_stream,
++                      &iomaps, &nr_iomaps, i_blocksize(inode));
+       if (nfserr != nfs_ok)
+               return nfserr;
+ 
+--- a/fs/nfsd/blocklayoutxdr.c
++++ b/fs/nfsd/blocklayoutxdr.c
+@@ -113,8 +113,7 @@ nfsd4_block_encode_getdeviceinfo(struct
+ 
+ /**
+  * nfsd4_block_decode_layoutupdate - decode the block layout extent array
+- * @p: pointer to the xdr data
+- * @len: number of bytes to decode
++ * @xdr: subbuf set to the encoded array
+  * @iomapp: pointer to store the decoded extent array
+  * @nr_iomapsp: pointer to store the number of extents
+  * @block_size: alignment of extent offset and length
+@@ -127,25 +126,24 @@ nfsd4_block_encode_getdeviceinfo(struct
+  *
+  * Return values:
+  *   %nfs_ok: Successful decoding, @iomapp and @nr_iomapsp are valid
+- *   %nfserr_bad_xdr: The encoded array in @p is invalid
++ *   %nfserr_bad_xdr: The encoded array in @xdr is invalid
+  *   %nfserr_inval: An unaligned extent found
+  *   %nfserr_delay: Failed to allocate memory for @iomapp
+  */
+ __be32
+-nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
++nfsd4_block_decode_layoutupdate(struct xdr_stream *xdr, struct iomap **iomapp,
+               int *nr_iomapsp, u32 block_size)
+ {
+       struct iomap *iomaps;
+-      u32 nr_iomaps, i;
++      u32 nr_iomaps, expected, len, i;
++      __be32 nfserr;
+ 
+-      if (len < sizeof(u32))
+-              return nfserr_bad_xdr;
+-      len -= sizeof(u32);
+-      if (len % PNFS_BLOCK_EXTENT_SIZE)
++      if (xdr_stream_decode_u32(xdr, &nr_iomaps))
+               return nfserr_bad_xdr;
+ 
+-      nr_iomaps = be32_to_cpup(p++);
+-      if (nr_iomaps != len / PNFS_BLOCK_EXTENT_SIZE)
++      len = sizeof(__be32) + xdr_stream_remaining(xdr);
++      expected = sizeof(__be32) + nr_iomaps * PNFS_BLOCK_EXTENT_SIZE;
++      if (len != expected)
+               return nfserr_bad_xdr;
+ 
+       iomaps = kcalloc(nr_iomaps, sizeof(*iomaps), GFP_KERNEL);
+@@ -155,21 +153,44 @@ nfsd4_block_decode_layoutupdate(__be32 *
+       for (i = 0; i < nr_iomaps; i++) {
+               struct pnfs_block_extent bex;
+ 
+-              p = svcxdr_decode_deviceid4(p, &bex.vol_id);
+-              p = xdr_decode_hyper(p, &bex.foff);
++              if (nfsd4_decode_deviceid4(xdr, &bex.vol_id)) {
++                      nfserr = nfserr_bad_xdr;
++                      goto fail;
++              }
++
++              if (xdr_stream_decode_u64(xdr, &bex.foff)) {
++                      nfserr = nfserr_bad_xdr;
++                      goto fail;
++              }
+               if (bex.foff & (block_size - 1)) {
++                      nfserr = nfserr_inval;
++                      goto fail;
++              }
++
++              if (xdr_stream_decode_u64(xdr, &bex.len)) {
++                      nfserr = nfserr_bad_xdr;
+                       goto fail;
+               }
+-              p = xdr_decode_hyper(p, &bex.len);
+               if (bex.len & (block_size - 1)) {
++                      nfserr = nfserr_inval;
++                      goto fail;
++              }
++
++              if (xdr_stream_decode_u64(xdr, &bex.soff)) {
++                      nfserr = nfserr_bad_xdr;
+                       goto fail;
+               }
+-              p = xdr_decode_hyper(p, &bex.soff);
+               if (bex.soff & (block_size - 1)) {
++                      nfserr = nfserr_inval;
++                      goto fail;
++              }
++
++              if (xdr_stream_decode_u32(xdr, &bex.es)) {
++                      nfserr = nfserr_bad_xdr;
+                       goto fail;
+               }
+-              bex.es = be32_to_cpup(p++);
+               if (bex.es != PNFS_BLOCK_READWRITE_DATA) {
++                      nfserr = nfserr_inval;
+                       goto fail;
+               }
+ 
+@@ -182,13 +203,12 @@ nfsd4_block_decode_layoutupdate(__be32 *
+       return nfs_ok;
+ fail:
+       kfree(iomaps);
+-      return nfserr_inval;
++      return nfserr;
+ }
+ 
+ /**
+  * nfsd4_scsi_decode_layoutupdate - decode the scsi layout extent array
+- * @p: pointer to the xdr data
+- * @len: number of bytes to decode
++ * @xdr: subbuf set to the encoded array
+  * @iomapp: pointer to store the decoded extent array
+  * @nr_iomapsp: pointer to store the number of extents
+  * @block_size: alignment of extent offset and length
+@@ -200,21 +220,22 @@ fail:
+  *
+  * Return values:
+  *   %nfs_ok: Successful decoding, @iomapp and @nr_iomapsp are valid
+- *   %nfserr_bad_xdr: The encoded array in @p is invalid
++ *   %nfserr_bad_xdr: The encoded array in @xdr is invalid
+  *   %nfserr_inval: An unaligned extent found
+  *   %nfserr_delay: Failed to allocate memory for @iomapp
+  */
+ __be32
+-nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
++nfsd4_scsi_decode_layoutupdate(struct xdr_stream *xdr, struct iomap **iomapp,
+               int *nr_iomapsp, u32 block_size)
+ {
+       struct iomap *iomaps;
+-      u32 nr_iomaps, expected, i;
++      u32 nr_iomaps, expected, len, i;
++      __be32 nfserr;
+ 
+-      if (len < sizeof(u32))
++      if (xdr_stream_decode_u32(xdr, &nr_iomaps))
+               return nfserr_bad_xdr;
+ 
+-      nr_iomaps = be32_to_cpup(p++);
++      len = sizeof(__be32) + xdr_stream_remaining(xdr);
+       expected = sizeof(__be32) + nr_iomaps * PNFS_SCSI_RANGE_SIZE;
+       if (len != expected)
+               return nfserr_bad_xdr;
+@@ -226,14 +247,22 @@ nfsd4_scsi_decode_layoutupdate(__be32 *p
+       for (i = 0; i < nr_iomaps; i++) {
+               u64 val;
+ 
+-              p = xdr_decode_hyper(p, &val);
++              if (xdr_stream_decode_u64(xdr, &val)) {
++                      nfserr = nfserr_bad_xdr;
++                      goto fail;
++              }
+               if (val & (block_size - 1)) {
++                      nfserr = nfserr_inval;
+                       goto fail;
+               }
+               iomaps[i].offset = val;
+ 
+-              p = xdr_decode_hyper(p, &val);
++              if (xdr_stream_decode_u64(xdr, &val)) {
++                      nfserr = nfserr_bad_xdr;
++                      goto fail;
++              }
+               if (val & (block_size - 1)) {
++                      nfserr = nfserr_inval;
+                       goto fail;
+               }
+               iomaps[i].length = val;
+@@ -244,5 +273,5 @@ nfsd4_scsi_decode_layoutupdate(__be32 *p
+       return nfs_ok;
+ fail:
+       kfree(iomaps);
+-      return nfserr_inval;
++      return nfserr;
+ }
+--- a/fs/nfsd/blocklayoutxdr.h
++++ b/fs/nfsd/blocklayoutxdr.h
+@@ -54,9 +54,9 @@ __be32 nfsd4_block_encode_getdeviceinfo(
+               const struct nfsd4_getdeviceinfo *gdp);
+ __be32 nfsd4_block_encode_layoutget(struct xdr_stream *xdr,
+               const struct nfsd4_layoutget *lgp);
+-__be32 nfsd4_block_decode_layoutupdate(__be32 *p, u32 len,
++__be32 nfsd4_block_decode_layoutupdate(struct xdr_stream *xdr,
+               struct iomap **iomapp, int *nr_iomapsp, u32 block_size);
+-__be32 nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len,
++__be32 nfsd4_scsi_decode_layoutupdate(struct xdr_stream *xdr,
+               struct iomap **iomapp, int *nr_iomapsp, u32 block_size);
+ 
+ #endif /* _NFSD_BLOCKLAYOUTXDR_H */
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -2407,7 +2407,7 @@ nfsd4_layoutcommit(struct svc_rqst *rqst
+               lcp->lc_size_chg = false;
+       }
+ 
+-      nfserr = ops->proc_layoutcommit(inode, lcp);
++      nfserr = ops->proc_layoutcommit(inode, rqstp, lcp);
+       nfs4_put_stid(&ls->ls_stid);
+ out:
+       return nfserr;
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -571,6 +571,8 @@ static __be32
+ nfsd4_decode_layoutupdate4(struct nfsd4_compoundargs *argp,
+                          struct nfsd4_layoutcommit *lcp)
+ {
++      u32 len;
++
+       if (xdr_stream_decode_u32(argp->xdr, &lcp->lc_layout_type) < 0)
+               return nfserr_bad_xdr;
+       if (lcp->lc_layout_type < LAYOUT_NFSV4_1_FILES)
+@@ -578,13 +580,10 @@ nfsd4_decode_layoutupdate4(struct nfsd4_
+       if (lcp->lc_layout_type >= LAYOUT_TYPE_MAX)
+               return nfserr_bad_xdr;
+ 
+-      if (xdr_stream_decode_u32(argp->xdr, &lcp->lc_up_len) < 0)
++      if (xdr_stream_decode_u32(argp->xdr, &len) < 0)
++              return nfserr_bad_xdr;
++      if (!xdr_stream_subsegment(argp->xdr, &lcp->lc_up_layout, len))
+               return nfserr_bad_xdr;
+-      if (lcp->lc_up_len > 0) {
+-              lcp->lc_up_layout = xdr_inline_decode(argp->xdr, lcp->lc_up_len);
+-              if (!lcp->lc_up_layout)
+-                      return nfserr_bad_xdr;
+-      }
+ 
+       return nfs_ok;
+ }
+--- a/fs/nfsd/pnfs.h
++++ b/fs/nfsd/pnfs.h
+@@ -35,6 +35,7 @@ struct nfsd4_layout_ops {
+                       const struct nfsd4_layoutget *lgp);
+ 
+       __be32 (*proc_layoutcommit)(struct inode *inode,
++                      struct svc_rqst *rqstp,
+                       struct nfsd4_layoutcommit *lcp);
+ 
+       void (*fence_client)(struct nfs4_layout_stateid *ls,
+--- a/fs/nfsd/xdr4.h
++++ b/fs/nfsd/xdr4.h
+@@ -665,8 +665,7 @@ struct nfsd4_layoutcommit {
+       u64                     lc_last_wr;     /* request */
+       struct timespec64       lc_mtime;       /* request */
+       u32                     lc_layout_type; /* request */
+-      u32                     lc_up_len;      /* layout length */
+-      void                    *lc_up_layout;  /* decoded by callback */
++      struct xdr_buf          lc_up_layout;   /* decoded by callback */
+       bool                    lc_size_chg;    /* response */
+       u64                     lc_newsize;     /* response */
+ };
diff --git a/queue-6.12/nfsd-minor-cleanup-in-layoutcommit-processing.patch b/queue-6.12/nfsd-minor-cleanup-in-layoutcommit-processing.patch

new file mode 100644 (file)

index 0000000..a56be05
--- /dev/null
+++ b/queue-6.12/nfsd-minor-cleanup-in-layoutcommit-processing.patch
@@ -0,0 +1,50 @@
+From stable+bounces-188068-greg=kroah.com@vger.kernel.org Mon Oct 20 14:53:43 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Oct 2025 08:53:02 -0400
+Subject: NFSD: Minor cleanup in layoutcommit processing
+To: stable@vger.kernel.org
+Cc: Sergey Bashirov <sergeybashirov@gmail.com>, Christoph Hellwig <hch@lst.de>, Chuck Lever <chuck.lever@oracle.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251020125305.1760219-4-sashal@kernel.org>
+
+From: Sergey Bashirov <sergeybashirov@gmail.com>
+
+[ Upstream commit 274365a51d88658fb51cca637ba579034e90a799 ]
+
+Remove dprintk in nfsd4_layoutcommit. These are not needed
+in day to day usage, and the information is also available
+in Wireshark when capturing NFS traffic.
+
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Sergey Bashirov <sergeybashirov@gmail.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Stable-dep-of: d68886bae76a ("NFSD: Fix last write offset handling in layoutcommit")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfsd/nfs4proc.c |   12 +++---------
+ 1 file changed, 3 insertions(+), 9 deletions(-)
+
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -2379,18 +2379,12 @@ nfsd4_layoutcommit(struct svc_rqst *rqst
+       inode = d_inode(current_fh->fh_dentry);
+ 
+       nfserr = nfserr_inval;
+-      if (new_size <= seg->offset) {
+-              dprintk("pnfsd: last write before layout segment\n");
++      if (new_size <= seg->offset)
+               goto out;
+-      }
+-      if (new_size > seg->offset + seg->length) {
+-              dprintk("pnfsd: last write beyond layout segment\n");
++      if (new_size > seg->offset + seg->length)
+               goto out;
+-      }
+-      if (!lcp->lc_newoffset && new_size > i_size_read(inode)) {
+-              dprintk("pnfsd: layoutcommit beyond EOF\n");
++      if (!lcp->lc_newoffset && new_size > i_size_read(inode))
+               goto out;
+-      }
+ 
+       nfserr = nfsd4_preprocess_layout_stateid(rqstp, cstate, &lcp->lc_sid,
+                                               false, lcp->lc_layout_type,
diff --git a/queue-6.12/nfsd-rework-encoding-and-decoding-of-nfsd4_deviceid.patch b/queue-6.12/nfsd-rework-encoding-and-decoding-of-nfsd4_deviceid.patch

new file mode 100644 (file)

index 0000000..21818b5
--- /dev/null
+++ b/queue-6.12/nfsd-rework-encoding-and-decoding-of-nfsd4_deviceid.patch
@@ -0,0 +1,156 @@
+From stable+bounces-188067-greg=kroah.com@vger.kernel.org Mon Oct 20 14:53:26 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Oct 2025 08:53:01 -0400
+Subject: NFSD: Rework encoding and decoding of nfsd4_deviceid
+To: stable@vger.kernel.org
+Cc: Sergey Bashirov <sergeybashirov@gmail.com>, Chuck Lever <chuck.lever@oracle.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251020125305.1760219-3-sashal@kernel.org>
+
+From: Sergey Bashirov <sergeybashirov@gmail.com>
+
+[ Upstream commit 832738e4b325b742940761e10487403f9aad13e8 ]
+
+Compilers may optimize the layout of C structures, so we should not rely
+on sizeof struct and memcpy to encode and decode XDR structures. The byte
+order of the fields should also be taken into account.
+
+This patch adds the correct functions to handle the deviceid4 structure
+and removes the pad field, which is currently not used by NFSD, from the
+runtime state. The server's byte order is preserved because the deviceid4
+blob on the wire is only used as a cookie by the client.
+
+Signed-off-by: Sergey Bashirov <sergeybashirov@gmail.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Stable-dep-of: d68886bae76a ("NFSD: Fix last write offset handling in layoutcommit")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfsd/blocklayoutxdr.c    |    7 ++-----
+ fs/nfsd/flexfilelayoutxdr.c |    3 +--
+ fs/nfsd/nfs4layouts.c       |    1 -
+ fs/nfsd/nfs4xdr.c           |   14 +-------------
+ fs/nfsd/xdr4.h              |   36 +++++++++++++++++++++++++++++++++++-
+ 5 files changed, 39 insertions(+), 22 deletions(-)
+
+--- a/fs/nfsd/blocklayoutxdr.c
++++ b/fs/nfsd/blocklayoutxdr.c
+@@ -29,8 +29,7 @@ nfsd4_block_encode_layoutget(struct xdr_
+       *p++ = cpu_to_be32(len);
+       *p++ = cpu_to_be32(1);          /* we always return a single extent */
+ 
+-      p = xdr_encode_opaque_fixed(p, &b->vol_id,
+-                      sizeof(struct nfsd4_deviceid));
++      p = svcxdr_encode_deviceid4(p, &b->vol_id);
+       p = xdr_encode_hyper(p, b->foff);
+       p = xdr_encode_hyper(p, b->len);
+       p = xdr_encode_hyper(p, b->soff);
+@@ -156,9 +155,7 @@ nfsd4_block_decode_layoutupdate(__be32 *
+       for (i = 0; i < nr_iomaps; i++) {
+               struct pnfs_block_extent bex;
+ 
+-              memcpy(&bex.vol_id, p, sizeof(struct nfsd4_deviceid));
+-              p += XDR_QUADLEN(sizeof(struct nfsd4_deviceid));
+-
++              p = svcxdr_decode_deviceid4(p, &bex.vol_id);
+               p = xdr_decode_hyper(p, &bex.foff);
+               if (bex.foff & (block_size - 1)) {
+                       goto fail;
+--- a/fs/nfsd/flexfilelayoutxdr.c
++++ b/fs/nfsd/flexfilelayoutxdr.c
+@@ -54,8 +54,7 @@ nfsd4_ff_encode_layoutget(struct xdr_str
+       *p++ = cpu_to_be32(1);                  /* single mirror */
+       *p++ = cpu_to_be32(1);                  /* single data server */
+ 
+-      p = xdr_encode_opaque_fixed(p, &fl->deviceid,
+-                      sizeof(struct nfsd4_deviceid));
++      p = svcxdr_encode_deviceid4(p, &fl->deviceid);
+ 
+       *p++ = cpu_to_be32(1);                  /* efficiency */
+ 
+--- a/fs/nfsd/nfs4layouts.c
++++ b/fs/nfsd/nfs4layouts.c
+@@ -120,7 +120,6 @@ nfsd4_set_deviceid(struct nfsd4_deviceid
+ 
+       id->fsid_idx = fhp->fh_export->ex_devid_map->idx;
+       id->generation = device_generation;
+-      id->pad = 0;
+       return 0;
+ }
+ 
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -566,18 +566,6 @@ nfsd4_decode_state_owner4(struct nfsd4_c
+ }
+ 
+ #ifdef CONFIG_NFSD_PNFS
+-static __be32
+-nfsd4_decode_deviceid4(struct nfsd4_compoundargs *argp,
+-                     struct nfsd4_deviceid *devid)
+-{
+-      __be32 *p;
+-
+-      p = xdr_inline_decode(argp->xdr, NFS4_DEVICEID4_SIZE);
+-      if (!p)
+-              return nfserr_bad_xdr;
+-      memcpy(devid, p, sizeof(*devid));
+-      return nfs_ok;
+-}
+ 
+ static __be32
+ nfsd4_decode_layoutupdate4(struct nfsd4_compoundargs *argp,
+@@ -1762,7 +1750,7 @@ nfsd4_decode_getdeviceinfo(struct nfsd4_
+       __be32 status;
+ 
+       memset(gdev, 0, sizeof(*gdev));
+-      status = nfsd4_decode_deviceid4(argp, &gdev->gd_devid);
++      status = nfsd4_decode_deviceid4(argp->xdr, &gdev->gd_devid);
+       if (status)
+               return status;
+       if (xdr_stream_decode_u32(argp->xdr, &gdev->gd_layout_type) < 0)
+--- a/fs/nfsd/xdr4.h
++++ b/fs/nfsd/xdr4.h
+@@ -596,9 +596,43 @@ struct nfsd4_reclaim_complete {
+ struct nfsd4_deviceid {
+       u64                     fsid_idx;
+       u32                     generation;
+-      u32                     pad;
+ };
+ 
++static inline __be32 *
++svcxdr_encode_deviceid4(__be32 *p, const struct nfsd4_deviceid *devid)
++{
++      __be64 *q = (__be64 *)p;
++
++      *q = (__force __be64)devid->fsid_idx;
++      p += 2;
++      *p++ = (__force __be32)devid->generation;
++      *p++ = xdr_zero;
++      return p;
++}
++
++static inline __be32 *
++svcxdr_decode_deviceid4(__be32 *p, struct nfsd4_deviceid *devid)
++{
++      __be64 *q = (__be64 *)p;
++
++      devid->fsid_idx = (__force u64)(*q);
++      p += 2;
++      devid->generation = (__force u32)(*p++);
++      p++; /* NFSD does not use the remaining octets */
++      return p;
++}
++
++static inline __be32
++nfsd4_decode_deviceid4(struct xdr_stream *xdr, struct nfsd4_deviceid *devid)
++{
++      __be32 *p = xdr_inline_decode(xdr, NFS4_DEVICEID4_SIZE);
++
++      if (unlikely(!p))
++              return nfserr_bad_xdr;
++      svcxdr_decode_deviceid4(p, devid);
++      return nfs_ok;
++}
++
+ struct nfsd4_layout_seg {
+       u32                     iomode;
+       u64                     offset;
diff --git a/queue-6.12/nfsd-use-correct-error-code-when-decoding-extents.patch b/queue-6.12/nfsd-use-correct-error-code-when-decoding-extents.patch

new file mode 100644 (file)

index 0000000..85e40e5
--- /dev/null
+++ b/queue-6.12/nfsd-use-correct-error-code-when-decoding-extents.patch
@@ -0,0 +1,234 @@
+From stable+bounces-188065-greg=kroah.com@vger.kernel.org Mon Oct 20 14:53:24 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Oct 2025 08:52:59 -0400
+Subject: nfsd: Use correct error code when decoding extents
+To: stable@vger.kernel.org
+Cc: Sergey Bashirov <sergeybashirov@gmail.com>, Christoph Hellwig <hch@lst.de>, Chuck Lever <chuck.lever@oracle.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251020125305.1760219-1-sashal@kernel.org>
+
+From: Sergey Bashirov <sergeybashirov@gmail.com>
+
+[ Upstream commit 26d05e1c37d276905bc921384b5a75158fca284b ]
+
+Update error codes in decoding functions of block and scsi layout
+drivers to match the core nfsd code. NFS4ERR_EINVAL means that the
+server was able to decode the request, but the decoded values are
+invalid. Use NFS4ERR_BADXDR instead to indicate a decoding error.
+And ENOMEM is changed to nfs code NFS4ERR_DELAY.
+
+Signed-off-by: Sergey Bashirov <sergeybashirov@gmail.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Stable-dep-of: d68886bae76a ("NFSD: Fix last write offset handling in layoutcommit")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfsd/blocklayout.c    |   20 +++++++------
+ fs/nfsd/blocklayoutxdr.c |   71 +++++++++++++++++++++++++++++++++++++----------
+ fs/nfsd/blocklayoutxdr.h |    8 ++---
+ fs/nfsd/nfsd.h           |    1 
+ 4 files changed, 73 insertions(+), 27 deletions(-)
+
+--- a/fs/nfsd/blocklayout.c
++++ b/fs/nfsd/blocklayout.c
+@@ -178,11 +178,13 @@ nfsd4_block_proc_layoutcommit(struct ino
+ {
+       struct iomap *iomaps;
+       int nr_iomaps;
++      __be32 nfserr;
+ 
+-      nr_iomaps = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout,
+-                      lcp->lc_up_len, &iomaps, i_blocksize(inode));
+-      if (nr_iomaps < 0)
+-              return nfserrno(nr_iomaps);
++      nfserr = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout,
++                      lcp->lc_up_len, &iomaps, &nr_iomaps,
++                      i_blocksize(inode));
++      if (nfserr != nfs_ok)
++              return nfserr;
+ 
+       return nfsd4_block_commit_blocks(inode, lcp, iomaps, nr_iomaps);
+ }
+@@ -316,11 +318,13 @@ nfsd4_scsi_proc_layoutcommit(struct inod
+ {
+       struct iomap *iomaps;
+       int nr_iomaps;
++      __be32 nfserr;
+ 
+-      nr_iomaps = nfsd4_scsi_decode_layoutupdate(lcp->lc_up_layout,
+-                      lcp->lc_up_len, &iomaps, i_blocksize(inode));
+-      if (nr_iomaps < 0)
+-              return nfserrno(nr_iomaps);
++      nfserr = nfsd4_scsi_decode_layoutupdate(lcp->lc_up_layout,
++                      lcp->lc_up_len, &iomaps, &nr_iomaps,
++                      i_blocksize(inode));
++      if (nfserr != nfs_ok)
++              return nfserr;
+ 
+       return nfsd4_block_commit_blocks(inode, lcp, iomaps, nr_iomaps);
+ }
+--- a/fs/nfsd/blocklayoutxdr.c
++++ b/fs/nfsd/blocklayoutxdr.c
+@@ -112,34 +112,54 @@ nfsd4_block_encode_getdeviceinfo(struct
+       return 0;
+ }
+ 
+-int
++/**
++ * nfsd4_block_decode_layoutupdate - decode the block layout extent array
++ * @p: pointer to the xdr data
++ * @len: number of bytes to decode
++ * @iomapp: pointer to store the decoded extent array
++ * @nr_iomapsp: pointer to store the number of extents
++ * @block_size: alignment of extent offset and length
++ *
++ * This function decodes the opaque field of the layoutupdate4 structure
++ * in a layoutcommit request for the block layout driver. The field is
++ * actually an array of extents sent by the client. It also checks that
++ * the file offset, storage offset and length of each extent are aligned
++ * by @block_size.
++ *
++ * Return values:
++ *   %nfs_ok: Successful decoding, @iomapp and @nr_iomapsp are valid
++ *   %nfserr_bad_xdr: The encoded array in @p is invalid
++ *   %nfserr_inval: An unaligned extent found
++ *   %nfserr_delay: Failed to allocate memory for @iomapp
++ */
++__be32
+ nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
+-              u32 block_size)
++              int *nr_iomapsp, u32 block_size)
+ {
+       struct iomap *iomaps;
+       u32 nr_iomaps, i;
+ 
+       if (len < sizeof(u32)) {
+               dprintk("%s: extent array too small: %u\n", __func__, len);
+-              return -EINVAL;
++              return nfserr_bad_xdr;
+       }
+       len -= sizeof(u32);
+       if (len % PNFS_BLOCK_EXTENT_SIZE) {
+               dprintk("%s: extent array invalid: %u\n", __func__, len);
+-              return -EINVAL;
++              return nfserr_bad_xdr;
+       }
+ 
+       nr_iomaps = be32_to_cpup(p++);
+       if (nr_iomaps != len / PNFS_BLOCK_EXTENT_SIZE) {
+               dprintk("%s: extent array size mismatch: %u/%u\n",
+                       __func__, len, nr_iomaps);
+-              return -EINVAL;
++              return nfserr_bad_xdr;
+       }
+ 
+       iomaps = kcalloc(nr_iomaps, sizeof(*iomaps), GFP_KERNEL);
+       if (!iomaps) {
+               dprintk("%s: failed to allocate extent array\n", __func__);
+-              return -ENOMEM;
++              return nfserr_delay;
+       }
+ 
+       for (i = 0; i < nr_iomaps; i++) {
+@@ -178,22 +198,42 @@ nfsd4_block_decode_layoutupdate(__be32 *
+       }
+ 
+       *iomapp = iomaps;
+-      return nr_iomaps;
++      *nr_iomapsp = nr_iomaps;
++      return nfs_ok;
+ fail:
+       kfree(iomaps);
+-      return -EINVAL;
++      return nfserr_inval;
+ }
+ 
+-int
++/**
++ * nfsd4_scsi_decode_layoutupdate - decode the scsi layout extent array
++ * @p: pointer to the xdr data
++ * @len: number of bytes to decode
++ * @iomapp: pointer to store the decoded extent array
++ * @nr_iomapsp: pointer to store the number of extents
++ * @block_size: alignment of extent offset and length
++ *
++ * This function decodes the opaque field of the layoutupdate4 structure
++ * in a layoutcommit request for the scsi layout driver. The field is
++ * actually an array of extents sent by the client. It also checks that
++ * the offset and length of each extent are aligned by @block_size.
++ *
++ * Return values:
++ *   %nfs_ok: Successful decoding, @iomapp and @nr_iomapsp are valid
++ *   %nfserr_bad_xdr: The encoded array in @p is invalid
++ *   %nfserr_inval: An unaligned extent found
++ *   %nfserr_delay: Failed to allocate memory for @iomapp
++ */
++__be32
+ nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
+-              u32 block_size)
++              int *nr_iomapsp, u32 block_size)
+ {
+       struct iomap *iomaps;
+       u32 nr_iomaps, expected, i;
+ 
+       if (len < sizeof(u32)) {
+               dprintk("%s: extent array too small: %u\n", __func__, len);
+-              return -EINVAL;
++              return nfserr_bad_xdr;
+       }
+ 
+       nr_iomaps = be32_to_cpup(p++);
+@@ -201,13 +241,13 @@ nfsd4_scsi_decode_layoutupdate(__be32 *p
+       if (len != expected) {
+               dprintk("%s: extent array size mismatch: %u/%u\n",
+                       __func__, len, expected);
+-              return -EINVAL;
++              return nfserr_bad_xdr;
+       }
+ 
+       iomaps = kcalloc(nr_iomaps, sizeof(*iomaps), GFP_KERNEL);
+       if (!iomaps) {
+               dprintk("%s: failed to allocate extent array\n", __func__);
+-              return -ENOMEM;
++              return nfserr_delay;
+       }
+ 
+       for (i = 0; i < nr_iomaps; i++) {
+@@ -229,8 +269,9 @@ nfsd4_scsi_decode_layoutupdate(__be32 *p
+       }
+ 
+       *iomapp = iomaps;
+-      return nr_iomaps;
++      *nr_iomapsp = nr_iomaps;
++      return nfs_ok;
+ fail:
+       kfree(iomaps);
+-      return -EINVAL;
++      return nfserr_inval;
+ }
+--- a/fs/nfsd/blocklayoutxdr.h
++++ b/fs/nfsd/blocklayoutxdr.h
+@@ -54,9 +54,9 @@ __be32 nfsd4_block_encode_getdeviceinfo(
+               const struct nfsd4_getdeviceinfo *gdp);
+ __be32 nfsd4_block_encode_layoutget(struct xdr_stream *xdr,
+               const struct nfsd4_layoutget *lgp);
+-int nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
+-              u32 block_size);
+-int nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
+-              u32 block_size);
++__be32 nfsd4_block_decode_layoutupdate(__be32 *p, u32 len,
++              struct iomap **iomapp, int *nr_iomapsp, u32 block_size);
++__be32 nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len,
++              struct iomap **iomapp, int *nr_iomapsp, u32 block_size);
+ 
+ #endif /* _NFSD_BLOCKLAYOUTXDR_H */
+--- a/fs/nfsd/nfsd.h
++++ b/fs/nfsd/nfsd.h
+@@ -286,6 +286,7 @@ void               nfsd_lockd_shutdown(void);
+ #define       nfserr_cb_path_down     cpu_to_be32(NFSERR_CB_PATH_DOWN)
+ #define       nfserr_locked           cpu_to_be32(NFSERR_LOCKED)
+ #define       nfserr_wrongsec         cpu_to_be32(NFSERR_WRONGSEC)
++#define nfserr_delay                  cpu_to_be32(NFS4ERR_DELAY)
+ #define nfserr_badiomode              cpu_to_be32(NFS4ERR_BADIOMODE)
+ #define nfserr_badlayout              cpu_to_be32(NFS4ERR_BADLAYOUT)
+ #define nfserr_bad_session_digest     cpu_to_be32(NFS4ERR_BAD_SESSION_DIGEST)
diff --git a/queue-6.12/padata-reset-next-cpu-when-reorder-sequence-wraps-around.patch b/queue-6.12/padata-reset-next-cpu-when-reorder-sequence-wraps-around.patch

new file mode 100644 (file)

index 0000000..6531d76
--- /dev/null
+++ b/queue-6.12/padata-reset-next-cpu-when-reorder-sequence-wraps-around.patch
@@ -0,0 +1,44 @@
+From stable+bounces-188140-greg=kroah.com@vger.kernel.org Mon Oct 20 17:37:12 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Oct 2025 11:37:02 -0400
+Subject: padata: Reset next CPU when reorder sequence wraps around
+To: stable@vger.kernel.org
+Cc: Xiao Liang <shaw.leon@gmail.com>, Herbert Xu <herbert@gondor.apana.org.au>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251020153702.1820394-1-sashal@kernel.org>
+
+From: Xiao Liang <shaw.leon@gmail.com>
+
+[ Upstream commit 501302d5cee0d8e8ec2c4a5919c37e0df9abc99b ]
+
+When seq_nr wraps around, the next reorder job with seq 0 is hashed to
+the first CPU in padata_do_serial(). Correspondingly, need reset pd->cpu
+to the first one when pd->processed wraps around. Otherwise, if the
+number of used CPUs is not a power of 2, padata_find_next() will be
+checking a wrong list, hence deadlock.
+
+Fixes: 6fc4dbcf0276 ("padata: Replace delayed timer with immediate workqueue in padata_reorder")
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Xiao Liang <shaw.leon@gmail.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+[ relocated fix to padata_find_next() using pd->processed and pd->cpu structure fields ]
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/padata.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/kernel/padata.c
++++ b/kernel/padata.c
+@@ -290,7 +290,11 @@ static struct padata_priv *padata_find_n
+       if (remove_object) {
+               list_del_init(&padata->list);
+               ++pd->processed;
+-              pd->cpu = cpumask_next_wrap(cpu, pd->cpumask.pcpu, -1, false);
++              /* When sequence wraps around, reset to the first CPU. */
++              if (unlikely(pd->processed == 0))
++                      pd->cpu = cpumask_first(pd->cpumask.pcpu);
++              else
++                      pd->cpu = cpumask_next_wrap(cpu, pd->cpumask.pcpu, -1, false);
+       }
+ 
+       spin_unlock(&reorder->lock);
diff --git a/queue-6.12/pci-add-pci_vdevice_sub-helper-macro.patch b/queue-6.12/pci-add-pci_vdevice_sub-helper-macro.patch

new file mode 100644 (file)

index 0000000..6baf78a
--- /dev/null
+++ b/queue-6.12/pci-add-pci_vdevice_sub-helper-macro.patch
@@ -0,0 +1,51 @@
+From stable+bounces-188207-greg=kroah.com@vger.kernel.org Mon Oct 20 19:28:50 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Oct 2025 13:28:38 -0400
+Subject: PCI: Add PCI_VDEVICE_SUB helper macro
+To: stable@vger.kernel.org
+Cc: Piotr Kwapulinski <piotr.kwapulinski@intel.com>, Przemek Kitszel <przemyslaw.kitszel@intel.com>, Bjorn Helgaas <bhelgaas@google.com>, Rafal Romanowski <rafal.romanowski@intel.com>, Tony Nguyen <anthony.l.nguyen@intel.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251020172841.1850940-1-sashal@kernel.org>
+
+From: Piotr Kwapulinski <piotr.kwapulinski@intel.com>
+
+[ Upstream commit 208fff3f567e2a3c3e7e4788845e90245c3891b4 ]
+
+PCI_VDEVICE_SUB generates the pci_device_id struct layout for
+the specific PCI device/subdevice. Private data may follow the
+output.
+
+Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
+Signed-off-by: Piotr Kwapulinski <piotr.kwapulinski@intel.com>
+Acked-by: Bjorn Helgaas <bhelgaas@google.com>
+Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Stable-dep-of: a7075f501bd3 ("ixgbevf: fix mailbox API compatibility by negotiating supported features")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/pci.h |   14 ++++++++++++++
+ 1 file changed, 14 insertions(+)
+
+--- a/include/linux/pci.h
++++ b/include/linux/pci.h
+@@ -1067,6 +1067,20 @@ struct pci_driver {
+       .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, 0, 0
+ 
+ /**
++ * PCI_VDEVICE_SUB - describe a specific PCI device/subdevice in a short form
++ * @vend: the vendor name
++ * @dev: the 16 bit PCI Device ID
++ * @subvend: the 16 bit PCI Subvendor ID
++ * @subdev: the 16 bit PCI Subdevice ID
++ *
++ * Generate the pci_device_id struct layout for the specific PCI
++ * device/subdevice. Private data may follow the output.
++ */
++#define PCI_VDEVICE_SUB(vend, dev, subvend, subdev) \
++      .vendor = PCI_VENDOR_ID_##vend, .device = (dev), \
++      .subvendor = (subvend), .subdevice = (subdev), 0, 0
++
++/**
+  * PCI_DEVICE_DATA - macro used to describe a specific PCI device in very short form
+  * @vend: the vendor name (without PCI_VENDOR_ID_ prefix)
+  * @dev: the device name (without PCI_DEVICE_ID_<vend>_ prefix)
diff --git a/queue-6.12/phy-cadence-cdns-dphy-fix-pll-lock-and-o_cmn_ready-polling.patch b/queue-6.12/phy-cadence-cdns-dphy-fix-pll-lock-and-o_cmn_ready-polling.patch

new file mode 100644 (file)

index 0000000..88a16a4
--- /dev/null
+++ b/queue-6.12/phy-cadence-cdns-dphy-fix-pll-lock-and-o_cmn_ready-polling.patch
@@ -0,0 +1,265 @@
+From stable+bounces-188089-greg=kroah.com@vger.kernel.org Mon Oct 20 15:02:19 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Oct 2025 08:59:06 -0400
+Subject: phy: cadence: cdns-dphy: Fix PLL lock and O_CMN_READY polling
+To: stable@vger.kernel.org
+Cc: Devarsh Thakkar <devarsht@ti.com>, Harikrishna Shenoy <h-shenoy@ti.com>, Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>, Vinod Koul <vkoul@kernel.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251020125906.1762647-2-sashal@kernel.org>
+
+From: Devarsh Thakkar <devarsht@ti.com>
+
+[ Upstream commit 284fb19a3ffb1083c3ad9c00d29749d09dddb99c ]
+
+PLL lockup and O_CMN_READY assertion can only happen after common state
+machine gets enabled by programming DPHY_CMN_SSM register, but driver was
+polling them before the common state machine was enabled which is
+incorrect.  This is as per the DPHY initialization sequence as mentioned in
+J721E TRM [1] at section "12.7.2.4.1.2.1 Start-up Sequence Timing Diagram".
+It shows O_CMN_READY polling at the end after common configuration pin
+setup where the common configuration pin setup step enables state machine
+as referenced in "Table 12-1533. Common Configuration-Related Setup
+mentions state machine"
+
+To fix this :
+- Add new function callbacks for polling on PLL lock and O_CMN_READY
+  assertion.
+- As state machine and clocks get enabled in power_on callback only, move
+  the clock related programming part from configure callback to power_on
+callback and poll for the PLL lockup and O_CMN_READY assertion after state
+machine gets enabled.
+- The configure callback only saves the PLL configuration received from the
+  client driver which will be applied later on in power_on callback.
+- Add checks to ensure configure is called before power_on and state
+  machine is in disabled state before power_on callback is called.
+- Disable state machine in power_off so that client driver can re-configure
+  the PLL by following up a power_off, configure, power_on sequence.
+
+[1]: https://www.ti.com/lit/zip/spruil1
+
+Cc: stable@vger.kernel.org
+Fixes: 7a343c8bf4b5 ("phy: Add Cadence D-PHY support")
+Signed-off-by: Devarsh Thakkar <devarsht@ti.com>
+Tested-by: Harikrishna Shenoy <h-shenoy@ti.com>
+Reviewed-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
+Link: https://lore.kernel.org/r/20250704125915.1224738-2-devarsht@ti.com
+Signed-off-by: Vinod Koul <vkoul@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/phy/cadence/cdns-dphy.c |  124 +++++++++++++++++++++++++++++-----------
+ 1 file changed, 92 insertions(+), 32 deletions(-)
+
+--- a/drivers/phy/cadence/cdns-dphy.c
++++ b/drivers/phy/cadence/cdns-dphy.c
+@@ -100,6 +100,8 @@ struct cdns_dphy_ops {
+       void (*set_pll_cfg)(struct cdns_dphy *dphy,
+                           const struct cdns_dphy_cfg *cfg);
+       unsigned long (*get_wakeup_time_ns)(struct cdns_dphy *dphy);
++      int (*wait_for_pll_lock)(struct cdns_dphy *dphy);
++      int (*wait_for_cmn_ready)(struct cdns_dphy *dphy);
+ };
+ 
+ struct cdns_dphy {
+@@ -109,6 +111,8 @@ struct cdns_dphy {
+       struct clk *pll_ref_clk;
+       const struct cdns_dphy_ops *ops;
+       struct phy *phy;
++      bool is_configured;
++      bool is_powered;
+ };
+ 
+ /* Order of bands is important since the index is the band number. */
+@@ -195,6 +199,16 @@ static unsigned long cdns_dphy_get_wakeu
+       return dphy->ops->get_wakeup_time_ns(dphy);
+ }
+ 
++static int cdns_dphy_wait_for_pll_lock(struct cdns_dphy *dphy)
++{
++      return dphy->ops->wait_for_pll_lock ? dphy->ops->wait_for_pll_lock(dphy) : 0;
++}
++
++static int cdns_dphy_wait_for_cmn_ready(struct cdns_dphy *dphy)
++{
++      return  dphy->ops->wait_for_cmn_ready ? dphy->ops->wait_for_cmn_ready(dphy) : 0;
++}
++
+ static unsigned long cdns_dphy_ref_get_wakeup_time_ns(struct cdns_dphy *dphy)
+ {
+       /* Default wakeup time is 800 ns (in a simulated environment). */
+@@ -236,7 +250,6 @@ static unsigned long cdns_dphy_j721e_get
+ static void cdns_dphy_j721e_set_pll_cfg(struct cdns_dphy *dphy,
+                                       const struct cdns_dphy_cfg *cfg)
+ {
+-      u32 status;
+ 
+       /*
+        * set the PWM and PLL Byteclk divider settings to recommended values
+@@ -253,13 +266,6 @@ static void cdns_dphy_j721e_set_pll_cfg(
+ 
+       writel(DPHY_TX_J721E_WIZ_LANE_RSTB,
+              dphy->regs + DPHY_TX_J721E_WIZ_RST_CTRL);
+-
+-      readl_poll_timeout(dphy->regs + DPHY_TX_J721E_WIZ_PLL_CTRL, status,
+-                         (status & DPHY_TX_WIZ_PLL_LOCK), 0, POLL_TIMEOUT_US);
+-
+-      readl_poll_timeout(dphy->regs + DPHY_TX_J721E_WIZ_STATUS, status,
+-                         (status & DPHY_TX_WIZ_O_CMN_READY), 0,
+-                         POLL_TIMEOUT_US);
+ }
+ 
+ static void cdns_dphy_j721e_set_psm_div(struct cdns_dphy *dphy, u8 div)
+@@ -267,6 +273,23 @@ static void cdns_dphy_j721e_set_psm_div(
+       writel(div, dphy->regs + DPHY_TX_J721E_WIZ_PSM_FREQ);
+ }
+ 
++static int cdns_dphy_j721e_wait_for_pll_lock(struct cdns_dphy *dphy)
++{
++      u32 status;
++
++      return readl_poll_timeout(dphy->regs + DPHY_TX_J721E_WIZ_PLL_CTRL, status,
++                             status & DPHY_TX_WIZ_PLL_LOCK, 0, POLL_TIMEOUT_US);
++}
++
++static int cdns_dphy_j721e_wait_for_cmn_ready(struct cdns_dphy *dphy)
++{
++      u32 status;
++
++      return readl_poll_timeout(dphy->regs + DPHY_TX_J721E_WIZ_STATUS, status,
++                             status & DPHY_TX_WIZ_O_CMN_READY, 0,
++                             POLL_TIMEOUT_US);
++}
++
+ /*
+  * This is the reference implementation of DPHY hooks. Specific integration of
+  * this IP may have to re-implement some of them depending on how they decided
+@@ -282,6 +305,8 @@ static const struct cdns_dphy_ops j721e_
+       .get_wakeup_time_ns = cdns_dphy_j721e_get_wakeup_time_ns,
+       .set_pll_cfg = cdns_dphy_j721e_set_pll_cfg,
+       .set_psm_div = cdns_dphy_j721e_set_psm_div,
++      .wait_for_pll_lock = cdns_dphy_j721e_wait_for_pll_lock,
++      .wait_for_cmn_ready = cdns_dphy_j721e_wait_for_cmn_ready,
+ };
+ 
+ static int cdns_dphy_config_from_opts(struct phy *phy,
+@@ -339,21 +364,36 @@ static int cdns_dphy_validate(struct phy
+ static int cdns_dphy_configure(struct phy *phy, union phy_configure_opts *opts)
+ {
+       struct cdns_dphy *dphy = phy_get_drvdata(phy);
+-      struct cdns_dphy_cfg cfg = { 0 };
+-      int ret, band_ctrl;
+-      unsigned int reg;
++      int ret;
+ 
+-      ret = cdns_dphy_config_from_opts(phy, &opts->mipi_dphy, &cfg);
+-      if (ret)
+-              return ret;
++      ret = cdns_dphy_config_from_opts(phy, &opts->mipi_dphy, &dphy->cfg);
++      if (!ret)
++              dphy->is_configured = true;
++
++      return ret;
++}
++
++static int cdns_dphy_power_on(struct phy *phy)
++{
++      struct cdns_dphy *dphy = phy_get_drvdata(phy);
++      int ret;
++      u32 reg;
++
++      if (!dphy->is_configured || dphy->is_powered)
++              return -EINVAL;
++
++      clk_prepare_enable(dphy->psm_clk);
++      clk_prepare_enable(dphy->pll_ref_clk);
+ 
+       /*
+        * Configure the internal PSM clk divider so that the DPHY has a
+        * 1MHz clk (or something close).
+        */
+       ret = cdns_dphy_setup_psm(dphy);
+-      if (ret)
+-              return ret;
++      if (ret) {
++              dev_err(&dphy->phy->dev, "Failed to setup PSM with error %d\n", ret);
++              goto err_power_on;
++      }
+ 
+       /*
+        * Configure attach clk lanes to data lanes: the DPHY has 2 clk lanes
+@@ -368,40 +408,60 @@ static int cdns_dphy_configure(struct ph
+        * Configure the DPHY PLL that will be used to generate the TX byte
+        * clk.
+        */
+-      cdns_dphy_set_pll_cfg(dphy, &cfg);
++      cdns_dphy_set_pll_cfg(dphy, &dphy->cfg);
+ 
+-      band_ctrl = cdns_dphy_tx_get_band_ctrl(opts->mipi_dphy.hs_clk_rate);
+-      if (band_ctrl < 0)
+-              return band_ctrl;
++      ret = cdns_dphy_tx_get_band_ctrl(dphy->cfg.hs_clk_rate);
++      if (ret < 0) {
++              dev_err(&dphy->phy->dev, "Failed to get band control value with error %d\n", ret);
++              goto err_power_on;
++      }
+ 
+-      reg = FIELD_PREP(DPHY_BAND_CFG_LEFT_BAND, band_ctrl) |
+-            FIELD_PREP(DPHY_BAND_CFG_RIGHT_BAND, band_ctrl);
++      reg = FIELD_PREP(DPHY_BAND_CFG_LEFT_BAND, ret) |
++            FIELD_PREP(DPHY_BAND_CFG_RIGHT_BAND, ret);
+       writel(reg, dphy->regs + DPHY_BAND_CFG);
+ 
+-      return 0;
+-}
+-
+-static int cdns_dphy_power_on(struct phy *phy)
+-{
+-      struct cdns_dphy *dphy = phy_get_drvdata(phy);
+-
+-      clk_prepare_enable(dphy->psm_clk);
+-      clk_prepare_enable(dphy->pll_ref_clk);
+-
+       /* Start TX state machine. */
+       writel(DPHY_CMN_SSM_EN | DPHY_CMN_TX_MODE_EN,
+              dphy->regs + DPHY_CMN_SSM);
+ 
++      ret = cdns_dphy_wait_for_pll_lock(dphy);
++      if (ret) {
++              dev_err(&dphy->phy->dev, "Failed to lock PLL with error %d\n", ret);
++              goto err_power_on;
++      }
++
++      ret = cdns_dphy_wait_for_cmn_ready(dphy);
++      if (ret) {
++              dev_err(&dphy->phy->dev, "O_CMN_READY signal failed to assert with error %d\n",
++                      ret);
++              goto err_power_on;
++      }
++
++      dphy->is_powered = true;
++
+       return 0;
++
++err_power_on:
++      clk_disable_unprepare(dphy->pll_ref_clk);
++      clk_disable_unprepare(dphy->psm_clk);
++
++      return ret;
+ }
+ 
+ static int cdns_dphy_power_off(struct phy *phy)
+ {
+       struct cdns_dphy *dphy = phy_get_drvdata(phy);
++      u32 reg;
+ 
+       clk_disable_unprepare(dphy->pll_ref_clk);
+       clk_disable_unprepare(dphy->psm_clk);
+ 
++      /* Stop TX state machine. */
++      reg = readl(dphy->regs + DPHY_CMN_SSM);
++      writel(reg & ~DPHY_CMN_SSM_EN, dphy->regs + DPHY_CMN_SSM);
++
++      dphy->is_powered = false;
++
+       return 0;
+ }
+ 
diff --git a/queue-6.12/phy-cadence-cdns-dphy-update-calibration-wait-time-for-startup-state-machine.patch b/queue-6.12/phy-cadence-cdns-dphy-update-calibration-wait-time-for-startup-state-machine.patch

new file mode 100644 (file)

index 0000000..2193431
--- /dev/null
+++ b/queue-6.12/phy-cadence-cdns-dphy-update-calibration-wait-time-for-startup-state-machine.patch
@@ -0,0 +1,58 @@
+From stable+bounces-188377-greg=kroah.com@vger.kernel.org Tue Oct 21 18:44:00 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 21 Oct 2025 12:43:33 -0400
+Subject: phy: cadence: cdns-dphy: Update calibration wait time for startup state machine
+To: stable@vger.kernel.org
+Cc: Devarsh Thakkar <devarsht@ti.com>, Harikrishna Shenoy <h-shenoy@ti.com>, Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>, Vinod Koul <vkoul@kernel.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251021164333.2380694-3-sashal@kernel.org>
+
+From: Devarsh Thakkar <devarsht@ti.com>
+
+[ Upstream commit 2c27aaee934a1b5229152fe33a14f1fdf50da143 ]
+
+Do read-modify-write so that we re-use the characterized reset value as
+specified in TRM [1] to program calibration wait time which defines number
+of cycles to wait for after startup state machine is in bandgap enable
+state.
+
+This fixes PLL lock timeout error faced while using RPi DSI Panel on TI's
+AM62L and J721E SoC since earlier calibration wait time was getting
+overwritten to zero value thus failing the PLL to lockup and causing
+timeout.
+
+[1] AM62P TRM (Section 14.8.6.3.2.1.1 DPHY_TX_DPHYTX_CMN0_CMN_DIG_TBIT2):
+Link: https://www.ti.com/lit/pdf/spruj83
+
+Cc: stable@vger.kernel.org
+Fixes: 7a343c8bf4b5 ("phy: Add Cadence D-PHY support")
+Signed-off-by: Devarsh Thakkar <devarsht@ti.com>
+Tested-by: Harikrishna Shenoy <h-shenoy@ti.com>
+Reviewed-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
+Link: https://lore.kernel.org/r/20250704125915.1224738-3-devarsht@ti.com
+Signed-off-by: Vinod Koul <vkoul@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/phy/cadence/cdns-dphy.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/phy/cadence/cdns-dphy.c
++++ b/drivers/phy/cadence/cdns-dphy.c
+@@ -30,6 +30,7 @@
+ 
+ #define DPHY_CMN_SSM                  DPHY_PMA_CMN(0x20)
+ #define DPHY_CMN_SSM_EN                       BIT(0)
++#define DPHY_CMN_SSM_CAL_WAIT_TIME    GENMASK(8, 1)
+ #define DPHY_CMN_TX_MODE_EN           BIT(9)
+ 
+ #define DPHY_CMN_PWM                  DPHY_PMA_CMN(0x40)
+@@ -421,7 +422,8 @@ static int cdns_dphy_power_on(struct phy
+       writel(reg, dphy->regs + DPHY_BAND_CFG);
+ 
+       /* Start TX state machine. */
+-      writel(DPHY_CMN_SSM_EN | DPHY_CMN_TX_MODE_EN,
++      reg = readl(dphy->regs + DPHY_CMN_SSM);
++      writel((reg & DPHY_CMN_SSM_CAL_WAIT_TIME) | DPHY_CMN_SSM_EN | DPHY_CMN_TX_MODE_EN,
+              dphy->regs + DPHY_CMN_SSM);
+ 
+       ret = cdns_dphy_wait_for_pll_lock(dphy);
diff --git a/queue-6.12/phy-cdns-dphy-store-hs_clk_rate-and-return-it.patch b/queue-6.12/phy-cdns-dphy-store-hs_clk_rate-and-return-it.patch

new file mode 100644 (file)

index 0000000..66cca43
--- /dev/null
+++ b/queue-6.12/phy-cdns-dphy-store-hs_clk_rate-and-return-it.patch
@@ -0,0 +1,59 @@
+From stable+bounces-188088-greg=kroah.com@vger.kernel.org Mon Oct 20 15:04:37 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Oct 2025 08:59:05 -0400
+Subject: phy: cdns-dphy: Store hs_clk_rate and return it
+To: stable@vger.kernel.org
+Cc: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>, Aradhya Bhatia <aradhya.bhatia@linux.dev>, Parth Pancholi <parth.pancholi@toradex.com>, Jayesh Choudhary <j-choudhary@ti.com>, Vinod Koul <vkoul@kernel.org>, Devarsh Thakkar <devarsht@ti.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251020125906.1762647-1-sashal@kernel.org>
+
+From: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
+
+[ Upstream commit 689a54acb56858c85de8c7285db82b8ae6dbf683 ]
+
+The DPHY driver does not return the actual hs_clk_rate, so the DSI
+driver has no idea what clock was actually achieved. Set the realized
+hs_clk_rate to the opts struct, so that the DSI driver gets it back.
+
+Reviewed-by: Aradhya Bhatia <aradhya.bhatia@linux.dev>
+Tested-by: Parth Pancholi <parth.pancholi@toradex.com>
+Tested-by: Jayesh Choudhary <j-choudhary@ti.com>
+Acked-by: Vinod Koul <vkoul@kernel.org>
+Reviewed-by: Devarsh Thakkar <devarsht@ti.com>
+Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
+Link: https://lore.kernel.org/r/20250723-cdns-dphy-hs-clk-rate-fix-v1-1-d4539d44cbe7@ideasonboard.com
+Signed-off-by: Vinod Koul <vkoul@kernel.org>
+Stable-dep-of: 284fb19a3ffb ("phy: cadence: cdns-dphy: Fix PLL lock and O_CMN_READY polling")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/phy/cadence/cdns-dphy.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/drivers/phy/cadence/cdns-dphy.c
++++ b/drivers/phy/cadence/cdns-dphy.c
+@@ -79,6 +79,7 @@ struct cdns_dphy_cfg {
+       u8 pll_ipdiv;
+       u8 pll_opdiv;
+       u16 pll_fbdiv;
++      u32 hs_clk_rate;
+       unsigned int nlanes;
+ };
+ 
+@@ -154,6 +155,9 @@ static int cdns_dsi_get_dphy_pll_cfg(str
+                                         cfg->pll_ipdiv,
+                                         pll_ref_hz);
+ 
++      cfg->hs_clk_rate = div_u64((u64)pll_ref_hz * cfg->pll_fbdiv,
++                                 2 * cfg->pll_opdiv * cfg->pll_ipdiv);
++
+       return 0;
+ }
+ 
+@@ -297,6 +301,7 @@ static int cdns_dphy_config_from_opts(st
+       if (ret)
+               return ret;
+ 
++      opts->hs_clk_rate = cfg->hs_clk_rate;
+       opts->wakeup = cdns_dphy_get_wakeup_time_ns(dphy) / 1000;
+ 
+       return 0;
diff --git a/queue-6.12/pm-runtime-add-new-devm-functions.patch b/queue-6.12/pm-runtime-add-new-devm-functions.patch

new file mode 100644 (file)

index 0000000..6c6b121
--- /dev/null
+++ b/queue-6.12/pm-runtime-add-new-devm-functions.patch
@@ -0,0 +1,109 @@
+From stable+bounces-188094-greg=kroah.com@vger.kernel.org Mon Oct 20 15:02:47 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Oct 2025 09:02:38 -0400
+Subject: PM: runtime: Add new devm functions
+To: stable@vger.kernel.org
+Cc: "Bence Csókás" <csokas.bence@prolan.hu>, "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>, "Sasha Levin" <sashal@kernel.org>
+Message-ID: <20251020130239.1763909-1-sashal@kernel.org>
+
+From: Bence Csókás <csokas.bence@prolan.hu>
+
+[ Upstream commit 73db799bf5efc5a04654bb3ff6c9bf63a0dfa473 ]
+
+Add `devm_pm_runtime_set_active_enabled()` and
+`devm_pm_runtime_get_noresume()` for simplifying
+common cases in drivers.
+
+Signed-off-by: Bence Csókás <csokas.bence@prolan.hu>
+Link: https://patch.msgid.link/20250327195928.680771-3-csokas.bence@prolan.hu
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Stable-dep-of: 0792c1984a45 ("iio: imu: inv_icm42600: Simplify pm_runtime setup")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/base/power/runtime.c |   44 +++++++++++++++++++++++++++++++++++++++++++
+ include/linux/pm_runtime.h   |    4 +++
+ 2 files changed, 48 insertions(+)
+
+--- a/drivers/base/power/runtime.c
++++ b/drivers/base/power/runtime.c
+@@ -1554,6 +1554,32 @@ out:
+ }
+ EXPORT_SYMBOL_GPL(pm_runtime_enable);
+ 
++static void pm_runtime_set_suspended_action(void *data)
++{
++      pm_runtime_set_suspended(data);
++}
++
++/**
++ * devm_pm_runtime_set_active_enabled - set_active version of devm_pm_runtime_enable.
++ *
++ * @dev: Device to handle.
++ */
++int devm_pm_runtime_set_active_enabled(struct device *dev)
++{
++      int err;
++
++      err = pm_runtime_set_active(dev);
++      if (err)
++              return err;
++
++      err = devm_add_action_or_reset(dev, pm_runtime_set_suspended_action, dev);
++      if (err)
++              return err;
++
++      return devm_pm_runtime_enable(dev);
++}
++EXPORT_SYMBOL_GPL(devm_pm_runtime_set_active_enabled);
++
+ static void pm_runtime_disable_action(void *data)
+ {
+       pm_runtime_dont_use_autosuspend(data);
+@@ -1576,6 +1602,24 @@ int devm_pm_runtime_enable(struct device
+ }
+ EXPORT_SYMBOL_GPL(devm_pm_runtime_enable);
+ 
++static void pm_runtime_put_noidle_action(void *data)
++{
++      pm_runtime_put_noidle(data);
++}
++
++/**
++ * devm_pm_runtime_get_noresume - devres-enabled version of pm_runtime_get_noresume.
++ *
++ * @dev: Device to handle.
++ */
++int devm_pm_runtime_get_noresume(struct device *dev)
++{
++      pm_runtime_get_noresume(dev);
++
++      return devm_add_action_or_reset(dev, pm_runtime_put_noidle_action, dev);
++}
++EXPORT_SYMBOL_GPL(devm_pm_runtime_get_noresume);
++
+ /**
+  * pm_runtime_forbid - Block runtime PM of a device.
+  * @dev: Device to handle.
+--- a/include/linux/pm_runtime.h
++++ b/include/linux/pm_runtime.h
+@@ -94,7 +94,9 @@ extern void pm_runtime_new_link(struct d
+ extern void pm_runtime_drop_link(struct device_link *link);
+ extern void pm_runtime_release_supplier(struct device_link *link);
+ 
++int devm_pm_runtime_set_active_enabled(struct device *dev);
+ extern int devm_pm_runtime_enable(struct device *dev);
++int devm_pm_runtime_get_noresume(struct device *dev);
+ 
+ /**
+  * pm_suspend_ignore_children - Set runtime PM behavior regarding children.
+@@ -278,7 +280,9 @@ static inline void __pm_runtime_disable(
+ static inline void pm_runtime_allow(struct device *dev) {}
+ static inline void pm_runtime_forbid(struct device *dev) {}
+ 
++static inline int devm_pm_runtime_set_active_enabled(struct device *dev) { return 0; }
+ static inline int devm_pm_runtime_enable(struct device *dev) { return 0; }
++static inline int devm_pm_runtime_get_noresume(struct device *dev) { return 0; }
+ 
+ static inline void pm_suspend_ignore_children(struct device *dev, bool enable) {}
+ static inline void pm_runtime_get_noresume(struct device *dev) {}
diff --git a/queue-6.12/series b/queue-6.12/series

index 925cdd006bea2aa37c88686cd205aa62423e2edc..9621ee93343e78eabc8829b4a16126d9271441cb 100644 (file)
--- a/queue-6.12/series
+++ b/queue-6.12/series
@@ -91,3 +91,42 @@ hid-multitouch-fix-name-of-stylus-input-devices.patch
  asoc-amd-sdw_utils-avoid-null-deref-when-devm_kaspri.patch
  selftests-arg_parsing-ensure-data-is-flushed-to-disk.patch
  nvme-tcp-handle-tls-partially-sent-records-in-write_.patch
+hfsplus-fix-slab-out-of-bounds-read-in-hfsplus_strcasecmp.patch
+xfs-rename-the-old_crc-variable-in-xlog_recover_process.patch
+xfs-fix-log-crc-mismatches-between-i386-and-other-architectures.patch
+phy-cdns-dphy-store-hs_clk_rate-and-return-it.patch
+phy-cadence-cdns-dphy-fix-pll-lock-and-o_cmn_ready-polling.patch
+pm-runtime-add-new-devm-functions.patch
+iio-imu-inv_icm42600-simplify-pm_runtime-setup.patch
+iio-imu-inv_icm42600-avoid-configuring-if-already-pm_runtime-suspended.patch
+nfsd-use-correct-error-code-when-decoding-extents.patch
+nfsd-drop-dprintk-in-blocklayout-xdr-functions.patch
+nfsd-rework-encoding-and-decoding-of-nfsd4_deviceid.patch
+nfsd-minor-cleanup-in-layoutcommit-processing.patch
+nfsd-implement-large-extent-array-support-in-pnfs.patch
+nfsd-fix-last-write-offset-handling-in-layoutcommit.patch
+wifi-rtw89-avoid-possible-tx-wait-initialization-race.patch
+xfs-use-deferred-intent-items-for-reaping-crosslinked-blocks.patch
+padata-reset-next-cpu-when-reorder-sequence-wraps-around.patch
+md-raid0-handle-bio_split-errors.patch
+md-raid1-handle-bio_split-errors.patch
+md-raid10-handle-bio_split-errors.patch
+md-fix-mssing-blktrace-bio-split-events.patch
+x86-resctrl-refactor-resctrl_arch_rmid_read.patch
+x86-resctrl-fix-miscount-of-bandwidth-event-when-reactivating-previously-unavailable-rmid.patch
+d_alloc_parallel-set-dcache_par_lookup-earlier.patch
+vfs-don-t-leak-disconnected-dentries-on-umount.patch
+pci-add-pci_vdevice_sub-helper-macro.patch
+ixgbevf-add-support-for-intel-r-e610-device.patch
+ixgbevf-fix-getting-link-speed-data-for-e610-devices.patch
+ixgbevf-fix-mailbox-api-compatibility-by-negotiating-supported-features.patch
+tcp-convert-to-dev_net_rcu.patch
+tcp-cache-rtax_quickack-metric-in-a-hot-cache-line.patch
+net-dst-add-four-helpers-to-annotate-data-races-around-dst-dev.patch
+ipv4-adopt-dst_dev-skb_dst_dev-and-skb_dst_dev_net.patch
+net-add-locking-to-protect-skb-dev-access-in-ip_output.patch
+mptcp-call-dst_release-in-mptcp_active_enable.patch
+mptcp-use-__sk_dst_get-and-dst_dev_rcu-in-mptcp_active_enable.patch
+mptcp-reset-blackhole-on-success-with-non-loopback-ifaces.patch
+phy-cadence-cdns-dphy-update-calibration-wait-time-for-startup-state-machine.patch
+nfsd-define-a-proc_layoutcommit-for-the-flexfiles-layout-type.patch
diff --git a/queue-6.12/tcp-cache-rtax_quickack-metric-in-a-hot-cache-line.patch b/queue-6.12/tcp-cache-rtax_quickack-metric-in-a-hot-cache-line.patch

new file mode 100644 (file)

index 0000000..0881fd9
--- /dev/null
+++ b/queue-6.12/tcp-cache-rtax_quickack-metric-in-a-hot-cache-line.patch
@@ -0,0 +1,81 @@
+From stable+bounces-188148-greg=kroah.com@vger.kernel.org Mon Oct 20 17:45:17 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Oct 2025 11:44:03 -0400
+Subject: tcp: cache RTAX_QUICKACK metric in a hot cache line
+To: stable@vger.kernel.org
+Cc: Eric Dumazet <edumazet@google.com>, Jason Xing <kerneljasonxing@gmail.com>, Neal Cardwell <ncardwell@google.com>, Kuniyuki Iwashima <kuniyu@amazon.com>, Paolo Abeni <pabeni@redhat.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251020154409.1823664-2-sashal@kernel.org>
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 15492700ac41459b54a6683490adcee350ab11e3 ]
+
+tcp_in_quickack_mode() is called from input path for small packets.
+
+It calls __sk_dst_get() which reads sk->sk_dst_cache which has been
+put in sock_read_tx group (for good reasons).
+
+Then dst_metric(dst, RTAX_QUICKACK) also needs extra cache line misses.
+
+Cache RTAX_QUICKACK in icsk->icsk_ack.dst_quick_ack to no longer pull
+these cache lines for the cases a delayed ACK is scheduled.
+
+After this patch TCP receive path does not longer access sock_read_tx
+group.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Jason Xing <kerneljasonxing@gmail.com>
+Reviewed-by: Neal Cardwell <ncardwell@google.com>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://patch.msgid.link/20250312083907.1931644-1-edumazet@google.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Stable-dep-of: 833d4313bc1e ("mptcp: reset blackhole on success with non-loopback ifaces")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/inet_connection_sock.h |    3 ++-
+ net/core/sock.c                    |    6 +++++-
+ net/ipv4/tcp_input.c               |    3 +--
+ 3 files changed, 8 insertions(+), 4 deletions(-)
+
+--- a/include/net/inet_connection_sock.h
++++ b/include/net/inet_connection_sock.h
+@@ -116,7 +116,8 @@ struct inet_connection_sock {
+               #define ATO_BITS 8
+               __u32             ato:ATO_BITS,  /* Predicted tick of soft clock           */
+                                 lrcv_flowlabel:20, /* last received ipv6 flowlabel       */
+-                                unused:4;
++                                dst_quick_ack:1, /* cache dst RTAX_QUICKACK              */
++                                unused:3;
+               unsigned long     timeout;       /* Currently scheduled timeout            */
+               __u32             lrcvtime;      /* timestamp of last received data packet */
+               __u16             last_seg_size; /* Size of last incoming segment          */
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -2547,8 +2547,12 @@ void sk_setup_caps(struct sock *sk, stru
+       u32 max_segs = 1;
+ 
+       sk->sk_route_caps = dst->dev->features;
+-      if (sk_is_tcp(sk))
++      if (sk_is_tcp(sk)) {
++              struct inet_connection_sock *icsk = inet_csk(sk);
++
+               sk->sk_route_caps |= NETIF_F_GSO;
++              icsk->icsk_ack.dst_quick_ack = dst_metric(dst, RTAX_QUICKACK);
++      }
+       if (sk->sk_route_caps & NETIF_F_GSO)
+               sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
+       if (unlikely(sk->sk_gso_disabled))
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -331,9 +331,8 @@ static void tcp_enter_quickack_mode(stru
+ static bool tcp_in_quickack_mode(struct sock *sk)
+ {
+       const struct inet_connection_sock *icsk = inet_csk(sk);
+-      const struct dst_entry *dst = __sk_dst_get(sk);
+ 
+-      return (dst && dst_metric(dst, RTAX_QUICKACK)) ||
++      return icsk->icsk_ack.dst_quick_ack ||
+               (icsk->icsk_ack.quick && !inet_csk_in_pingpong_mode(sk));
+ }
+ 
diff --git a/queue-6.12/tcp-convert-to-dev_net_rcu.patch b/queue-6.12/tcp-convert-to-dev_net_rcu.patch

new file mode 100644 (file)

index 0000000..a7d0a93
--- /dev/null
+++ b/queue-6.12/tcp-convert-to-dev_net_rcu.patch
@@ -0,0 +1,197 @@
+From stable+bounces-188147-greg=kroah.com@vger.kernel.org Mon Oct 20 17:44:21 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Oct 2025 11:44:02 -0400
+Subject: tcp: convert to dev_net_rcu()
+To: stable@vger.kernel.org
+Cc: Eric Dumazet <edumazet@google.com>, Kuniyuki Iwashima <kuniyu@amazon.com>, Jakub Kicinski <kuba@kernel.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251020154409.1823664-1-sashal@kernel.org>
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit e7b9ecce562ca6a1de32c56c597fa45e08c44ec0 ]
+
+TCP uses of dev_net() are under RCU protection, change them
+to dev_net_rcu() to get LOCKDEP support.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://patch.msgid.link/20250301201424.2046477-4-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 833d4313bc1e ("mptcp: reset blackhole on success with non-loopback ifaces")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/inet6_hashtables.h |    2 +-
+ include/net/inet_hashtables.h  |    2 +-
+ net/ipv4/tcp_ipv4.c            |   12 ++++++------
+ net/ipv4/tcp_metrics.c         |    6 +++---
+ net/ipv6/tcp_ipv6.c            |   22 +++++++++++-----------
+ 5 files changed, 22 insertions(+), 22 deletions(-)
+
+--- a/include/net/inet6_hashtables.h
++++ b/include/net/inet6_hashtables.h
+@@ -150,7 +150,7 @@ static inline struct sock *__inet6_looku
+                                             int iif, int sdif,
+                                             bool *refcounted)
+ {
+-      struct net *net = dev_net(skb_dst(skb)->dev);
++      struct net *net = dev_net_rcu(skb_dst(skb)->dev);
+       const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+       struct sock *sk;
+ 
+--- a/include/net/inet_hashtables.h
++++ b/include/net/inet_hashtables.h
+@@ -492,7 +492,7 @@ static inline struct sock *__inet_lookup
+                                            const int sdif,
+                                            bool *refcounted)
+ {
+-      struct net *net = dev_net(skb_dst(skb)->dev);
++      struct net *net = dev_net_rcu(skb_dst(skb)->dev);
+       const struct iphdr *iph = ip_hdr(skb);
+       struct sock *sk;
+ 
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -494,14 +494,14 @@ int tcp_v4_err(struct sk_buff *skb, u32
+ {
+       const struct iphdr *iph = (const struct iphdr *)skb->data;
+       struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
+-      struct tcp_sock *tp;
++      struct net *net = dev_net_rcu(skb->dev);
+       const int type = icmp_hdr(skb)->type;
+       const int code = icmp_hdr(skb)->code;
+-      struct sock *sk;
+       struct request_sock *fastopen;
++      struct tcp_sock *tp;
+       u32 seq, snd_una;
++      struct sock *sk;
+       int err;
+-      struct net *net = dev_net(skb->dev);
+ 
+       sk = __inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
+                                      iph->daddr, th->dest, iph->saddr,
+@@ -786,7 +786,7 @@ static void tcp_v4_send_reset(const stru
+       arg.iov[0].iov_base = (unsigned char *)&rep;
+       arg.iov[0].iov_len  = sizeof(rep.th);
+ 
+-      net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
++      net = sk ? sock_net(sk) : dev_net_rcu(skb_dst(skb)->dev);
+ 
+       /* Invalid TCP option size or twice included auth */
+       if (tcp_parse_auth_options(tcp_hdr(skb), &md5_hash_location, &aoh))
+@@ -1965,7 +1965,7 @@ EXPORT_SYMBOL(tcp_v4_do_rcv);
+ 
+ int tcp_v4_early_demux(struct sk_buff *skb)
+ {
+-      struct net *net = dev_net(skb->dev);
++      struct net *net = dev_net_rcu(skb->dev);
+       const struct iphdr *iph;
+       const struct tcphdr *th;
+       struct sock *sk;
+@@ -2176,7 +2176,7 @@ static void tcp_v4_fill_cb(struct sk_buf
+ 
+ int tcp_v4_rcv(struct sk_buff *skb)
+ {
+-      struct net *net = dev_net(skb->dev);
++      struct net *net = dev_net_rcu(skb->dev);
+       enum skb_drop_reason drop_reason;
+       int sdif = inet_sdif(skb);
+       int dif = inet_iif(skb);
+--- a/net/ipv4/tcp_metrics.c
++++ b/net/ipv4/tcp_metrics.c
+@@ -170,7 +170,7 @@ static struct tcp_metrics_block *tcpm_ne
+       bool reclaim = false;
+ 
+       spin_lock_bh(&tcp_metrics_lock);
+-      net = dev_net(dst->dev);
++      net = dev_net_rcu(dst->dev);
+ 
+       /* While waiting for the spin-lock the cache might have been populated
+        * with this entry and so we have to check again.
+@@ -273,7 +273,7 @@ static struct tcp_metrics_block *__tcp_g
+               return NULL;
+       }
+ 
+-      net = dev_net(dst->dev);
++      net = dev_net_rcu(dst->dev);
+       hash ^= net_hash_mix(net);
+       hash = hash_32(hash, tcp_metrics_hash_log);
+ 
+@@ -318,7 +318,7 @@ static struct tcp_metrics_block *tcp_get
+       else
+               return NULL;
+ 
+-      net = dev_net(dst->dev);
++      net = dev_net_rcu(dst->dev);
+       hash ^= net_hash_mix(net);
+       hash = hash_32(hash, tcp_metrics_hash_log);
+ 
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -376,7 +376,7 @@ static int tcp_v6_err(struct sk_buff *sk
+ {
+       const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
+       const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
+-      struct net *net = dev_net(skb->dev);
++      struct net *net = dev_net_rcu(skb->dev);
+       struct request_sock *fastopen;
+       struct ipv6_pinfo *np;
+       struct tcp_sock *tp;
+@@ -864,16 +864,16 @@ static void tcp_v6_send_response(const s
+                                int oif, int rst, u8 tclass, __be32 label,
+                                u32 priority, u32 txhash, struct tcp_key *key)
+ {
+-      const struct tcphdr *th = tcp_hdr(skb);
+-      struct tcphdr *t1;
+-      struct sk_buff *buff;
+-      struct flowi6 fl6;
+-      struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
+-      struct sock *ctl_sk = net->ipv6.tcp_sk;
++      struct net *net = sk ? sock_net(sk) : dev_net_rcu(skb_dst(skb)->dev);
+       unsigned int tot_len = sizeof(struct tcphdr);
++      struct sock *ctl_sk = net->ipv6.tcp_sk;
++      const struct tcphdr *th = tcp_hdr(skb);
+       __be32 mrst = 0, *topt;
+       struct dst_entry *dst;
+-      __u32 mark = 0;
++      struct sk_buff *buff;
++      struct tcphdr *t1;
++      struct flowi6 fl6;
++      u32 mark = 0;
+ 
+       if (tsecr)
+               tot_len += TCPOLEN_TSTAMP_ALIGNED;
+@@ -1036,7 +1036,7 @@ static void tcp_v6_send_reset(const stru
+       if (!sk && !ipv6_unicast_destination(skb))
+               return;
+ 
+-      net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
++      net = sk ? sock_net(sk) : dev_net_rcu(skb_dst(skb)->dev);
+       /* Invalid TCP option size or twice included auth */
+       if (tcp_parse_auth_options(th, &md5_hash_location, &aoh))
+               return;
+@@ -1739,6 +1739,7 @@ static void tcp_v6_fill_cb(struct sk_buf
+ 
+ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
+ {
++      struct net *net = dev_net_rcu(skb->dev);
+       enum skb_drop_reason drop_reason;
+       int sdif = inet6_sdif(skb);
+       int dif = inet6_iif(skb);
+@@ -1748,7 +1749,6 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(s
+       bool refcounted;
+       int ret;
+       u32 isn;
+-      struct net *net = dev_net(skb->dev);
+ 
+       drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
+       if (skb->pkt_type != PACKET_HOST)
+@@ -1999,7 +1999,7 @@ do_time_wait:
+ 
+ void tcp_v6_early_demux(struct sk_buff *skb)
+ {
+-      struct net *net = dev_net(skb->dev);
++      struct net *net = dev_net_rcu(skb->dev);
+       const struct ipv6hdr *hdr;
+       const struct tcphdr *th;
+       struct sock *sk;
diff --git a/queue-6.12/vfs-don-t-leak-disconnected-dentries-on-umount.patch b/queue-6.12/vfs-don-t-leak-disconnected-dentries-on-umount.patch

new file mode 100644 (file)

index 0000000..cc2c984
--- /dev/null
+++ b/queue-6.12/vfs-don-t-leak-disconnected-dentries-on-umount.patch
@@ -0,0 +1,57 @@
+From stable+bounces-188212-greg=kroah.com@vger.kernel.org Mon Oct 20 19:29:10 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Oct 2025 13:28:59 -0400
+Subject: vfs: Don't leak disconnected dentries on umount
+To: stable@vger.kernel.org
+Cc: Jan Kara <jack@suse.cz>, syzbot+1d79ebe5383fc016cf07@syzkaller.appspotmail.com, Christian Brauner <brauner@kernel.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251020172900.1851256-2-sashal@kernel.org>
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit 56094ad3eaa21e6621396cc33811d8f72847a834 ]
+
+When user calls open_by_handle_at() on some inode that is not cached, we
+will create disconnected dentry for it. If such dentry is a directory,
+exportfs_decode_fh_raw() will then try to connect this dentry to the
+dentry tree through reconnect_path(). It may happen for various reasons
+(such as corrupted fs or race with rename) that the call to
+lookup_one_unlocked() in reconnect_one() will fail to find the dentry we
+are trying to reconnect and instead create a new dentry under the
+parent. Now this dentry will not be marked as disconnected although the
+parent still may well be disconnected (at least in case this
+inconsistency happened because the fs is corrupted and .. doesn't point
+to the real parent directory). This creates inconsistency in
+disconnected flags but AFAICS it was mostly harmless. At least until
+commit f1ee616214cb ("VFS: don't keep disconnected dentries on d_anon")
+which removed adding of most disconnected dentries to sb->s_anon list.
+Thus after this commit cleanup of disconnected dentries implicitely
+relies on the fact that dput() will immediately reclaim such dentries.
+However when some leaf dentry isn't marked as disconnected, as in the
+scenario described above, the reclaim doesn't happen and the dentries
+are "leaked". Memory reclaim can eventually reclaim them but otherwise
+they stay in memory and if umount comes first, we hit infamous "Busy
+inodes after unmount" bug. Make sure all dentries created under a
+disconnected parent are marked as disconnected as well.
+
+Reported-by: syzbot+1d79ebe5383fc016cf07@syzkaller.appspotmail.com
+Fixes: f1ee616214cb ("VFS: don't keep disconnected dentries on d_anon")
+CC: stable@vger.kernel.org
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/dcache.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/dcache.c
++++ b/fs/dcache.c
+@@ -2486,6 +2486,8 @@ struct dentry *d_alloc_parallel(struct d
+       spin_lock(&parent->d_lock);
+       new->d_parent = dget_dlock(parent);
+       hlist_add_head(&new->d_sib, &parent->d_children);
++      if (parent->d_flags & DCACHE_DISCONNECTED)
++              new->d_flags |= DCACHE_DISCONNECTED;
+       spin_unlock(&parent->d_lock);
+ 
+ retry:
diff --git a/queue-6.12/wifi-rtw89-avoid-possible-tx-wait-initialization-race.patch b/queue-6.12/wifi-rtw89-avoid-possible-tx-wait-initialization-race.patch

new file mode 100644 (file)

index 0000000..9833645
--- /dev/null
+++ b/queue-6.12/wifi-rtw89-avoid-possible-tx-wait-initialization-race.patch
@@ -0,0 +1,230 @@
+From stable+bounces-188156-greg=kroah.com@vger.kernel.org Mon Oct 20 17:50:51 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Oct 2025 11:47:33 -0400
+Subject: wifi: rtw89: avoid possible TX wait initialization race
+To: stable@vger.kernel.org
+Cc: Fedor Pchelkin <pchelkin@ispras.ru>, Ping-Ke Shih <pkshih@realtek.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251020154733.1824513-1-sashal@kernel.org>
+
+From: Fedor Pchelkin <pchelkin@ispras.ru>
+
+[ Upstream commit c24248ed78f33ea299ea61d105355ba47157d49f ]
+
+The value of skb_data->wait indicates whether skb is passed on to the
+core mac80211 stack or released by the driver itself.  Make sure that by
+the time skb is added to txwd queue and becomes visible to the completing
+side, it has already allocated and initialized TX wait related data (in
+case it's needed).
+
+This is found by code review and addresses a possible race scenario
+described below:
+
+      Waiting thread                          Completing thread
+
+rtw89_core_send_nullfunc()
+  rtw89_core_tx_write_link()
+    ...
+    rtw89_pci_txwd_submit()
+      skb_data->wait = NULL
+      /* add skb to the queue */
+      skb_queue_tail(&txwd->queue, skb)
+
+  /* another thread (e.g. rtw89_ops_tx) performs TX kick off for the same queue */
+
+                                            rtw89_pci_napi_poll()
+                                            ...
+                                              rtw89_pci_release_txwd_skb()
+                                                /* get skb from the queue */
+                                                skb_unlink(skb, &txwd->queue)
+                                                rtw89_pci_tx_status()
+                                                  rtw89_core_tx_wait_complete()
+                                                  /* use incorrect skb_data->wait */
+  rtw89_core_tx_kick_off_and_wait()
+  /* assign skb_data->wait but too late */
+
+Found by Linux Verification Center (linuxtesting.org).
+
+Fixes: 1ae5ca615285 ("wifi: rtw89: add function to wait for completion of TX skbs")
+Cc: stable@vger.kernel.org
+Signed-off-by: Fedor Pchelkin <pchelkin@ispras.ru>
+Acked-by: Ping-Ke Shih <pkshih@realtek.com>
+Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
+Link: https://patch.msgid.link/20250919210852.823912-3-pchelkin@ispras.ru
+[ adapted rtw89_core_tx_write_link() modifications to rtw89_core_tx_write() ]
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/wireless/realtek/rtw89/core.c     |   39 ++++++++++++++------------
+ drivers/net/wireless/realtek/rtw89/core.h     |    6 ++--
+ drivers/net/wireless/realtek/rtw89/mac80211.c |    2 -
+ drivers/net/wireless/realtek/rtw89/pci.c      |    2 -
+ 4 files changed, 26 insertions(+), 23 deletions(-)
+
+--- a/drivers/net/wireless/realtek/rtw89/core.c
++++ b/drivers/net/wireless/realtek/rtw89/core.c
+@@ -978,25 +978,14 @@ void rtw89_core_tx_kick_off(struct rtw89
+ }
+ 
+ int rtw89_core_tx_kick_off_and_wait(struct rtw89_dev *rtwdev, struct sk_buff *skb,
+-                                  int qsel, unsigned int timeout)
++                                  struct rtw89_tx_wait_info *wait, int qsel,
++                                  unsigned int timeout)
+ {
+-      struct rtw89_tx_skb_data *skb_data = RTW89_TX_SKB_CB(skb);
+-      struct rtw89_tx_wait_info *wait;
+       unsigned long time_left;
+       int ret = 0;
+ 
+       lockdep_assert_wiphy(rtwdev->hw->wiphy);
+ 
+-      wait = kzalloc(sizeof(*wait), GFP_KERNEL);
+-      if (!wait) {
+-              rtw89_core_tx_kick_off(rtwdev, qsel);
+-              return 0;
+-      }
+-
+-      init_completion(&wait->completion);
+-      wait->skb = skb;
+-      rcu_assign_pointer(skb_data->wait, wait);
+-
+       rtw89_core_tx_kick_off(rtwdev, qsel);
+       time_left = wait_for_completion_timeout(&wait->completion,
+                                               msecs_to_jiffies(timeout));
+@@ -1057,10 +1046,12 @@ int rtw89_h2c_tx(struct rtw89_dev *rtwde
+ }
+ 
+ int rtw89_core_tx_write(struct rtw89_dev *rtwdev, struct ieee80211_vif *vif,
+-                      struct ieee80211_sta *sta, struct sk_buff *skb, int *qsel)
++                      struct ieee80211_sta *sta, struct sk_buff *skb, int *qsel,
++                      struct rtw89_tx_wait_info *wait)
+ {
+       struct rtw89_sta *rtwsta = sta_to_rtwsta_safe(sta);
+       struct rtw89_vif *rtwvif = vif_to_rtwvif(vif);
++      struct rtw89_tx_skb_data *skb_data = RTW89_TX_SKB_CB(skb);
+       struct rtw89_core_tx_request tx_req = {0};
+       struct rtw89_sta_link *rtwsta_link = NULL;
+       struct rtw89_vif_link *rtwvif_link;
+@@ -1093,6 +1084,8 @@ int rtw89_core_tx_write(struct rtw89_dev
+       rtw89_core_tx_update_desc_info(rtwdev, &tx_req);
+       rtw89_core_tx_wake(rtwdev, &tx_req);
+ 
++      rcu_assign_pointer(skb_data->wait, wait);
++
+       ret = rtw89_hci_tx_write(rtwdev, &tx_req);
+       if (ret) {
+               rtw89_err(rtwdev, "failed to transmit skb to HCI\n");
+@@ -2908,7 +2901,7 @@ static void rtw89_core_txq_push(struct r
+                       goto out;
+               }
+               rtw89_core_txq_check_agg(rtwdev, rtwtxq, skb);
+-              ret = rtw89_core_tx_write(rtwdev, vif, sta, skb, NULL);
++              ret = rtw89_core_tx_write(rtwdev, vif, sta, skb, NULL, NULL);
+               if (ret) {
+                       rtw89_err(rtwdev, "failed to push txq: %d\n", ret);
+                       ieee80211_free_txskb(rtwdev->hw, skb);
+@@ -3084,7 +3077,7 @@ bottom:
+       skb_queue_walk_safe(&rtwsta->roc_queue, skb, tmp) {
+               skb_unlink(skb, &rtwsta->roc_queue);
+ 
+-              ret = rtw89_core_tx_write(rtwdev, vif, sta, skb, &qsel);
++              ret = rtw89_core_tx_write(rtwdev, vif, sta, skb, &qsel, NULL);
+               if (ret) {
+                       rtw89_warn(rtwdev, "pending tx failed with %d\n", ret);
+                       dev_kfree_skb_any(skb);
+@@ -3106,6 +3099,7 @@ static int rtw89_core_send_nullfunc(stru
+                                   struct rtw89_vif_link *rtwvif_link, bool qos, bool ps)
+ {
+       struct ieee80211_vif *vif = rtwvif_link_to_vif(rtwvif_link);
++      struct rtw89_tx_wait_info *wait;
+       struct ieee80211_sta *sta;
+       struct ieee80211_hdr *hdr;
+       struct sk_buff *skb;
+@@ -3114,6 +3108,12 @@ static int rtw89_core_send_nullfunc(stru
+       if (vif->type != NL80211_IFTYPE_STATION || !vif->cfg.assoc)
+               return 0;
+ 
++      wait = kzalloc(sizeof(*wait), GFP_KERNEL);
++      if (!wait)
++              return -ENOMEM;
++
++      init_completion(&wait->completion);
++
+       rcu_read_lock();
+       sta = ieee80211_find_sta(vif, vif->cfg.ap_addr);
+       if (!sta) {
+@@ -3127,11 +3127,13 @@ static int rtw89_core_send_nullfunc(stru
+               goto out;
+       }
+ 
++      wait->skb = skb;
++
+       hdr = (struct ieee80211_hdr *)skb->data;
+       if (ps)
+               hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PM);
+ 
+-      ret = rtw89_core_tx_write(rtwdev, vif, sta, skb, &qsel);
++      ret = rtw89_core_tx_write(rtwdev, vif, sta, skb, &qsel, wait);
+       if (ret) {
+               rtw89_warn(rtwdev, "nullfunc transmit failed: %d\n", ret);
+               dev_kfree_skb_any(skb);
+@@ -3140,10 +3142,11 @@ static int rtw89_core_send_nullfunc(stru
+ 
+       rcu_read_unlock();
+ 
+-      return rtw89_core_tx_kick_off_and_wait(rtwdev, skb, qsel,
++      return rtw89_core_tx_kick_off_and_wait(rtwdev, skb, wait, qsel,
+                                              RTW89_ROC_TX_TIMEOUT);
+ out:
+       rcu_read_unlock();
++      kfree(wait);
+ 
+       return ret;
+ }
+--- a/drivers/net/wireless/realtek/rtw89/core.h
++++ b/drivers/net/wireless/realtek/rtw89/core.h
+@@ -6818,12 +6818,14 @@ static inline bool rtw89_is_rtl885xb(str
+ }
+ 
+ int rtw89_core_tx_write(struct rtw89_dev *rtwdev, struct ieee80211_vif *vif,
+-                      struct ieee80211_sta *sta, struct sk_buff *skb, int *qsel);
++                      struct ieee80211_sta *sta, struct sk_buff *skb, int *qsel,
++                      struct rtw89_tx_wait_info *wait);
+ int rtw89_h2c_tx(struct rtw89_dev *rtwdev,
+                struct sk_buff *skb, bool fwdl);
+ void rtw89_core_tx_kick_off(struct rtw89_dev *rtwdev, u8 qsel);
+ int rtw89_core_tx_kick_off_and_wait(struct rtw89_dev *rtwdev, struct sk_buff *skb,
+-                                  int qsel, unsigned int timeout);
++                                  struct rtw89_tx_wait_info *wait, int qsel,
++                                  unsigned int timeout);
+ void rtw89_core_fill_txdesc(struct rtw89_dev *rtwdev,
+                           struct rtw89_tx_desc_info *desc_info,
+                           void *txdesc);
+--- a/drivers/net/wireless/realtek/rtw89/mac80211.c
++++ b/drivers/net/wireless/realtek/rtw89/mac80211.c
+@@ -36,7 +36,7 @@ static void rtw89_ops_tx(struct ieee8021
+               return;
+       }
+ 
+-      ret = rtw89_core_tx_write(rtwdev, vif, sta, skb, &qsel);
++      ret = rtw89_core_tx_write(rtwdev, vif, sta, skb, &qsel, NULL);
+       if (ret) {
+               rtw89_err(rtwdev, "failed to transmit skb: %d\n", ret);
+               ieee80211_free_txskb(hw, skb);
+--- a/drivers/net/wireless/realtek/rtw89/pci.c
++++ b/drivers/net/wireless/realtek/rtw89/pci.c
+@@ -1366,7 +1366,6 @@ static int rtw89_pci_txwd_submit(struct
+       struct pci_dev *pdev = rtwpci->pdev;
+       struct sk_buff *skb = tx_req->skb;
+       struct rtw89_pci_tx_data *tx_data = RTW89_PCI_TX_SKB_CB(skb);
+-      struct rtw89_tx_skb_data *skb_data = RTW89_TX_SKB_CB(skb);
+       bool en_wd_info = desc_info->en_wd_info;
+       u32 txwd_len;
+       u32 txwp_len;
+@@ -1382,7 +1381,6 @@ static int rtw89_pci_txwd_submit(struct
+       }
+ 
+       tx_data->dma = dma;
+-      rcu_assign_pointer(skb_data->wait, NULL);
+ 
+       txwp_len = sizeof(*txwp_info);
+       txwd_len = chip->txwd_body_size;
diff --git a/queue-6.12/x86-resctrl-fix-miscount-of-bandwidth-event-when-reactivating-previously-unavailable-rmid.patch b/queue-6.12/x86-resctrl-fix-miscount-of-bandwidth-event-when-reactivating-previously-unavailable-rmid.patch

new file mode 100644 (file)

index 0000000..78805c8
--- /dev/null
+++ b/queue-6.12/x86-resctrl-fix-miscount-of-bandwidth-event-when-reactivating-previously-unavailable-rmid.patch
@@ -0,0 +1,149 @@
+From stable+bounces-188200-greg=kroah.com@vger.kernel.org Mon Oct 20 18:41:02 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Oct 2025 12:38:53 -0400
+Subject: x86/resctrl: Fix miscount of bandwidth event when reactivating previously unavailable RMID
+To: stable@vger.kernel.org
+Cc: Babu Moger <babu.moger@amd.com>, "Borislav Petkov (AMD)" <bp@alien8.de>, Reinette Chatre <reinette.chatre@intel.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251020163853.1841192-2-sashal@kernel.org>
+
+From: Babu Moger <babu.moger@amd.com>
+
+[ Upstream commit 15292f1b4c55a3a7c940dbcb6cb8793871ed3d92 ]
+
+Users can create as many monitoring groups as the number of RMIDs supported
+by the hardware. However, on AMD systems, only a limited number of RMIDs
+are guaranteed to be actively tracked by the hardware. RMIDs that exceed
+this limit are placed in an "Unavailable" state.
+
+When a bandwidth counter is read for such an RMID, the hardware sets
+MSR_IA32_QM_CTR.Unavailable (bit 62). When such an RMID starts being tracked
+again the hardware counter is reset to zero. MSR_IA32_QM_CTR.Unavailable
+remains set on first read after tracking re-starts and is clear on all
+subsequent reads as long as the RMID is tracked.
+
+resctrl miscounts the bandwidth events after an RMID transitions from the
+"Unavailable" state back to being tracked. This happens because when the
+hardware starts counting again after resetting the counter to zero, resctrl
+in turn compares the new count against the counter value stored from the
+previous time the RMID was tracked.
+
+This results in resctrl computing an event value that is either undercounting
+(when new counter is more than stored counter) or a mistaken overflow (when
+new counter is less than stored counter).
+
+Reset the stored value (arch_mbm_state::prev_msr) of MSR_IA32_QM_CTR to
+zero whenever the RMID is in the "Unavailable" state to ensure accurate
+counting after the RMID resets to zero when it starts to be tracked again.
+
+Example scenario that results in mistaken overflow
+==================================================
+1. The resctrl filesystem is mounted, and a task is assigned to a
+   monitoring group.
+
+   $mount -t resctrl resctrl /sys/fs/resctrl
+   $mkdir /sys/fs/resctrl/mon_groups/test1/
+   $echo 1234 > /sys/fs/resctrl/mon_groups/test1/tasks
+
+   $cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes
+   21323            <- Total bytes on domain 0
+   "Unavailable"    <- Total bytes on domain 1
+
+   Task is running on domain 0. Counter on domain 1 is "Unavailable".
+
+2. The task runs on domain 0 for a while and then moves to domain 1. The
+   counter starts incrementing on domain 1.
+
+   $cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes
+   7345357          <- Total bytes on domain 0
+   4545             <- Total bytes on domain 1
+
+3. At some point, the RMID in domain 0 transitions to the "Unavailable"
+   state because the task is no longer executing in that domain.
+
+   $cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes
+   "Unavailable"    <- Total bytes on domain 0
+   434341           <- Total bytes on domain 1
+
+4.  Since the task continues to migrate between domains, it may eventually
+    return to domain 0.
+
+    $cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes
+    17592178699059  <- Overflow on domain 0
+    3232332         <- Total bytes on domain 1
+
+In this case, the RMID on domain 0 transitions from "Unavailable" state to
+active state. The hardware sets MSR_IA32_QM_CTR.Unavailable (bit 62) when
+the counter is read and begins tracking the RMID counting from 0.
+
+Subsequent reads succeed but return a value smaller than the previously
+saved MSR value (7345357). Consequently, the resctrl's overflow logic is
+triggered, it compares the previous value (7345357) with the new, smaller
+value and incorrectly interprets this as a counter overflow, adding a large
+delta.
+
+In reality, this is a false positive: the counter did not overflow but was
+simply reset when the RMID transitioned from "Unavailable" back to active
+state.
+
+Here is the text from APM [1] available from [2].
+
+"In PQOS Version 2.0 or higher, the MBM hardware will set the U bit on the
+first QM_CTR read when it begins tracking an RMID that it was not
+previously tracking. The U bit will be zero for all subsequent reads from
+that RMID while it is still tracked by the hardware. Therefore, a QM_CTR
+read with the U bit set when that RMID is in use by a processor can be
+considered 0 when calculating the difference with a subsequent read."
+
+[1] AMD64 Architecture Programmer's Manual Volume 2: System Programming
+    Publication # 24593 Revision 3.41 section 19.3.3 Monitoring L3 Memory
+    Bandwidth (MBM).
+
+  [ bp: Split commit message into smaller paragraph chunks for better
+    consumption. ]
+
+Fixes: 4d05bf71f157d ("x86/resctrl: Introduce AMD QOS feature")
+Signed-off-by: Babu Moger <babu.moger@amd.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
+Tested-by: Reinette Chatre <reinette.chatre@intel.com>
+Cc: stable@vger.kernel.org # needs adjustments for <= v6.17
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537 # [2]
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/resctrl/monitor.c |   14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/kernel/cpu/resctrl/monitor.c
++++ b/arch/x86/kernel/cpu/resctrl/monitor.c
+@@ -337,7 +337,9 @@ int resctrl_arch_rmid_read(struct rdt_re
+                          u32 unused, u32 rmid, enum resctrl_event_id eventid,
+                          u64 *val, void *ignored)
+ {
++      struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
+       int cpu = cpumask_any(&d->hdr.cpu_mask);
++      struct arch_mbm_state *am;
+       u64 msr_val;
+       u32 prmid;
+       int ret;
+@@ -346,12 +348,16 @@ int resctrl_arch_rmid_read(struct rdt_re
+ 
+       prmid = logical_rmid_to_physical_rmid(cpu, rmid);
+       ret = __rmid_read_phys(prmid, eventid, &msr_val);
+-      if (ret)
+-              return ret;
+ 
+-      *val = get_corrected_val(r, d, rmid, eventid, msr_val);
++      if (!ret) {
++              *val = get_corrected_val(r, d, rmid, eventid, msr_val);
++      } else if (ret == -EINVAL) {
++              am = get_arch_mbm_state(hw_dom, rmid, eventid);
++              if (am)
++                      am->prev_msr = 0;
++      }
+ 
+-      return 0;
++      return ret;
+ }
+ 
+ static void limbo_release_entry(struct rmid_entry *entry)
diff --git a/queue-6.12/x86-resctrl-refactor-resctrl_arch_rmid_read.patch b/queue-6.12/x86-resctrl-refactor-resctrl_arch_rmid_read.patch

new file mode 100644 (file)

index 0000000..ab0f565
--- /dev/null
+++ b/queue-6.12/x86-resctrl-refactor-resctrl_arch_rmid_read.patch
@@ -0,0 +1,89 @@
+From stable+bounces-188199-greg=kroah.com@vger.kernel.org Mon Oct 20 18:40:59 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Oct 2025 12:38:52 -0400
+Subject: x86/resctrl: Refactor resctrl_arch_rmid_read()
+To: stable@vger.kernel.org
+Cc: Babu Moger <babu.moger@amd.com>, "Borislav Petkov (AMD)" <bp@alien8.de>, Reinette Chatre <reinette.chatre@intel.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251020163853.1841192-1-sashal@kernel.org>
+
+From: Babu Moger <babu.moger@amd.com>
+
+[ Upstream commit 7c9ac605e202c4668e441fc8146a993577131ca1 ]
+
+resctrl_arch_rmid_read() adjusts the value obtained from MSR_IA32_QM_CTR to
+account for the overflow for MBM events and apply counter scaling for all the
+events. This logic is common to both reading an RMID and reading a hardware
+counter directly.
+
+Refactor the hardware value adjustment logic into get_corrected_val() to
+prepare for support of reading a hardware counter.
+
+Signed-off-by: Babu Moger <babu.moger@amd.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
+Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com
+Stable-dep-of: 15292f1b4c55 ("x86/resctrl: Fix miscount of bandwidth event when reactivating previously unavailable RMID")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/resctrl/monitor.c |   38 ++++++++++++++++++++--------------
+ 1 file changed, 23 insertions(+), 15 deletions(-)
+
+--- a/arch/x86/kernel/cpu/resctrl/monitor.c
++++ b/arch/x86/kernel/cpu/resctrl/monitor.c
+@@ -312,24 +312,13 @@ static u64 mbm_overflow_count(u64 prev_m
+       return chunks >> shift;
+ }
+ 
+-int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
+-                         u32 unused, u32 rmid, enum resctrl_event_id eventid,
+-                         u64 *val, void *ignored)
++static u64 get_corrected_val(struct rdt_resource *r, struct rdt_mon_domain *d,
++                           u32 rmid, enum resctrl_event_id eventid, u64 msr_val)
+ {
+       struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
+       struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
+-      int cpu = cpumask_any(&d->hdr.cpu_mask);
+       struct arch_mbm_state *am;
+-      u64 msr_val, chunks;
+-      u32 prmid;
+-      int ret;
+-
+-      resctrl_arch_rmid_read_context_check();
+-
+-      prmid = logical_rmid_to_physical_rmid(cpu, rmid);
+-      ret = __rmid_read_phys(prmid, eventid, &msr_val);
+-      if (ret)
+-              return ret;
++      u64 chunks;
+ 
+       am = get_arch_mbm_state(hw_dom, rmid, eventid);
+       if (am) {
+@@ -341,7 +330,26 @@ int resctrl_arch_rmid_read(struct rdt_re
+               chunks = msr_val;
+       }
+ 
+-      *val = chunks * hw_res->mon_scale;
++      return chunks * hw_res->mon_scale;
++}
++
++int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
++                         u32 unused, u32 rmid, enum resctrl_event_id eventid,
++                         u64 *val, void *ignored)
++{
++      int cpu = cpumask_any(&d->hdr.cpu_mask);
++      u64 msr_val;
++      u32 prmid;
++      int ret;
++
++      resctrl_arch_rmid_read_context_check();
++
++      prmid = logical_rmid_to_physical_rmid(cpu, rmid);
++      ret = __rmid_read_phys(prmid, eventid, &msr_val);
++      if (ret)
++              return ret;
++
++      *val = get_corrected_val(r, d, rmid, eventid, msr_val);
+ 
+       return 0;
+ }
diff --git a/queue-6.12/xfs-fix-log-crc-mismatches-between-i386-and-other-architectures.patch b/queue-6.12/xfs-fix-log-crc-mismatches-between-i386-and-other-architectures.patch

new file mode 100644 (file)

index 0000000..7511919
--- /dev/null
+++ b/queue-6.12/xfs-fix-log-crc-mismatches-between-i386-and-other-architectures.patch
@@ -0,0 +1,172 @@
+From stable+bounces-188052-greg=kroah.com@vger.kernel.org Mon Oct 20 14:46:47 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Oct 2025 08:43:58 -0400
+Subject: xfs: fix log CRC mismatches between i386 and other architectures
+To: stable@vger.kernel.org
+Cc: Christoph Hellwig <hch@lst.de>, Carlos Maiolino <cem@kernel.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251020124358.1756227-2-sashal@kernel.org>
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit e747883c7d7306acb4d683038d881528fbfbe749 ]
+
+When mounting file systems with a log that was dirtied on i386 on
+other architectures or vice versa, log recovery is unhappy:
+
+[   11.068052] XFS (vdb): Torn write (CRC failure) detected at log block 0x2. Truncating head block from 0xc.
+
+This is because the CRCs generated by i386 and other architectures
+always diff.  The reason for that is that sizeof(struct xlog_rec_header)
+returns different values for i386 vs the rest (324 vs 328), because the
+struct is not sizeof(uint64_t) aligned, and i386 has odd struct size
+alignment rules.
+
+This issue goes back to commit 13cdc853c519 ("Add log versioning, and new
+super block field for the log stripe") in the xfs-import tree, which
+adds log v2 support and the h_size field that causes the unaligned size.
+At that time it only mattered for the crude debug only log header
+checksum, but with commit 0e446be44806 ("xfs: add CRC checks to the log")
+it became a real issue for v5 file system, because now there is a proper
+CRC, and regular builds actually expect it match.
+
+Fix this by allowing checksums with and without the padding.
+
+Fixes: 0e446be44806 ("xfs: add CRC checks to the log")
+Cc: <stable@vger.kernel.org> # v3.8
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_log_format.h |   30 +++++++++++++++++++++++++++++-
+ fs/xfs/libxfs/xfs_ondisk.h     |    2 ++
+ fs/xfs/xfs_log.c               |    8 ++++----
+ fs/xfs/xfs_log_priv.h          |    4 ++--
+ fs/xfs/xfs_log_recover.c       |   19 +++++++++++++++++--
+ 5 files changed, 54 insertions(+), 9 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_log_format.h
++++ b/fs/xfs/libxfs/xfs_log_format.h
+@@ -174,12 +174,40 @@ typedef struct xlog_rec_header {
+       __be32    h_prev_block; /* block number to previous LR          :  4 */
+       __be32    h_num_logops; /* number of log operations in this LR  :  4 */
+       __be32    h_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE];
+-      /* new fields */
++
++      /* fields added by the Linux port: */
+       __be32    h_fmt;        /* format of log record                 :  4 */
+       uuid_t    h_fs_uuid;    /* uuid of FS                           : 16 */
++
++      /* fields added for log v2: */
+       __be32    h_size;       /* iclog size                           :  4 */
++
++      /*
++       * When h_size added for log v2 support, it caused structure to have
++       * a different size on i386 vs all other architectures because the
++       * sum of the size ofthe  member is not aligned by that of the largest
++       * __be64-sized member, and i386 has really odd struct alignment rules.
++       *
++       * Due to the way the log headers are placed out on-disk that alone is
++       * not a problem becaue the xlog_rec_header always sits alone in a
++       * BBSIZEs area, and the rest of that area is padded with zeroes.
++       * But xlog_cksum used to calculate the checksum based on the structure
++       * size, and thus gives different checksums for i386 vs the rest.
++       * We now do two checksum validation passes for both sizes to allow
++       * moving v5 file systems with unclean logs between i386 and other
++       * (little-endian) architectures.
++       */
++      __u32     h_pad0;
+ } xlog_rec_header_t;
+ 
++#ifdef __i386__
++#define XLOG_REC_SIZE         offsetofend(struct xlog_rec_header, h_size)
++#define XLOG_REC_SIZE_OTHER   sizeof(struct xlog_rec_header)
++#else
++#define XLOG_REC_SIZE         sizeof(struct xlog_rec_header)
++#define XLOG_REC_SIZE_OTHER   offsetofend(struct xlog_rec_header, h_size)
++#endif /* __i386__ */
++
+ typedef struct xlog_rec_ext_header {
+       __be32    xh_cycle;     /* write cycle of log                   : 4 */
+       __be32    xh_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE]; /*    : 256 */
+--- a/fs/xfs/libxfs/xfs_ondisk.h
++++ b/fs/xfs/libxfs/xfs_ondisk.h
+@@ -149,6 +149,8 @@ xfs_check_ondisk_structs(void)
+       XFS_CHECK_STRUCT_SIZE(struct xfs_rud_log_format,        16);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_map_extent,            32);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_phys_extent,           16);
++      XFS_CHECK_STRUCT_SIZE(struct xlog_rec_header,           328);
++      XFS_CHECK_STRUCT_SIZE(struct xlog_rec_ext_header,       260);
+ 
+       XFS_CHECK_OFFSET(struct xfs_bui_log_format, bui_extents,        16);
+       XFS_CHECK_OFFSET(struct xfs_cui_log_format, cui_extents,        16);
+--- a/fs/xfs/xfs_log.c
++++ b/fs/xfs/xfs_log.c
+@@ -1567,13 +1567,13 @@ xlog_cksum(
+       struct xlog             *log,
+       struct xlog_rec_header  *rhead,
+       char                    *dp,
+-      int                     size)
++      unsigned int            hdrsize,
++      unsigned int            size)
+ {
+       uint32_t                crc;
+ 
+       /* first generate the crc for the record header ... */
+-      crc = xfs_start_cksum_update((char *)rhead,
+-                            sizeof(struct xlog_rec_header),
++      crc = xfs_start_cksum_update((char *)rhead, hdrsize,
+                             offsetof(struct xlog_rec_header, h_crc));
+ 
+       /* ... then for additional cycle data for v2 logs ... */
+@@ -1837,7 +1837,7 @@ xlog_sync(
+ 
+       /* calculcate the checksum */
+       iclog->ic_header.h_crc = xlog_cksum(log, &iclog->ic_header,
+-                                          iclog->ic_datap, size);
++                      iclog->ic_datap, XLOG_REC_SIZE, size);
+       /*
+        * Intentionally corrupt the log record CRC based on the error injection
+        * frequency, if defined. This facilitates testing log recovery in the
+--- a/fs/xfs/xfs_log_priv.h
++++ b/fs/xfs/xfs_log_priv.h
+@@ -498,8 +498,8 @@ xlog_recover_finish(
+ extern void
+ xlog_recover_cancel(struct xlog *);
+ 
+-extern __le32  xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead,
+-                          char *dp, int size);
++__le32         xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead,
++              char *dp, unsigned int hdrsize, unsigned int size);
+ 
+ extern struct kmem_cache *xfs_log_ticket_cache;
+ struct xlog_ticket *xlog_ticket_alloc(struct xlog *log, int unit_bytes,
+--- a/fs/xfs/xfs_log_recover.c
++++ b/fs/xfs/xfs_log_recover.c
+@@ -2890,9 +2890,24 @@ xlog_recover_process(
+       int                     pass,
+       struct list_head        *buffer_list)
+ {
+-      __le32                  expected_crc = rhead->h_crc, crc;
++      __le32                  expected_crc = rhead->h_crc, crc, other_crc;
+ 
+-      crc = xlog_cksum(log, rhead, dp, be32_to_cpu(rhead->h_len));
++      crc = xlog_cksum(log, rhead, dp, XLOG_REC_SIZE,
++                      be32_to_cpu(rhead->h_len));
++
++      /*
++       * Look at the end of the struct xlog_rec_header definition in
++       * xfs_log_format.h for the glory details.
++       */
++      if (expected_crc && crc != expected_crc) {
++              other_crc = xlog_cksum(log, rhead, dp, XLOG_REC_SIZE_OTHER,
++                              be32_to_cpu(rhead->h_len));
++              if (other_crc == expected_crc) {
++                      xfs_notice_once(log->l_mp,
++      "Fixing up incorrect CRC due to padding.");
++                      crc = other_crc;
++              }
++      }
+ 
+       /*
+        * Nothing else to do if this is a CRC verification pass. Just return
diff --git a/queue-6.12/xfs-rename-the-old_crc-variable-in-xlog_recover_process.patch b/queue-6.12/xfs-rename-the-old_crc-variable-in-xlog_recover_process.patch

new file mode 100644 (file)

index 0000000..2607728
--- /dev/null
+++ b/queue-6.12/xfs-rename-the-old_crc-variable-in-xlog_recover_process.patch
@@ -0,0 +1,68 @@
+From stable+bounces-188051-greg=kroah.com@vger.kernel.org Mon Oct 20 14:44:07 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Oct 2025 08:43:57 -0400
+Subject: xfs: rename the old_crc variable in xlog_recover_process
+To: stable@vger.kernel.org
+Cc: Christoph Hellwig <hch@lst.de>, "Darrick J. Wong" <djwong@kernel.org>, Carlos Maiolino <cem@kernel.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251020124358.1756227-1-sashal@kernel.org>
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit 0b737f4ac1d3ec093347241df74bbf5f54a7e16c ]
+
+old_crc is a very misleading name.  Rename it to expected_crc as that
+described the usage much better.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Stable-dep-of: e747883c7d73 ("xfs: fix log CRC mismatches between i386 and other architectures")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_log_recover.c |   17 ++++++++---------
+ 1 file changed, 8 insertions(+), 9 deletions(-)
+
+--- a/fs/xfs/xfs_log_recover.c
++++ b/fs/xfs/xfs_log_recover.c
+@@ -2890,20 +2890,19 @@ xlog_recover_process(
+       int                     pass,
+       struct list_head        *buffer_list)
+ {
+-      __le32                  old_crc = rhead->h_crc;
+-      __le32                  crc;
++      __le32                  expected_crc = rhead->h_crc, crc;
+ 
+       crc = xlog_cksum(log, rhead, dp, be32_to_cpu(rhead->h_len));
+ 
+       /*
+        * Nothing else to do if this is a CRC verification pass. Just return
+        * if this a record with a non-zero crc. Unfortunately, mkfs always
+-       * sets old_crc to 0 so we must consider this valid even on v5 supers.
+-       * Otherwise, return EFSBADCRC on failure so the callers up the stack
+-       * know precisely what failed.
++       * sets expected_crc to 0 so we must consider this valid even on v5
++       * supers.  Otherwise, return EFSBADCRC on failure so the callers up the
++       * stack know precisely what failed.
+        */
+       if (pass == XLOG_RECOVER_CRCPASS) {
+-              if (old_crc && crc != old_crc)
++              if (expected_crc && crc != expected_crc)
+                       return -EFSBADCRC;
+               return 0;
+       }
+@@ -2914,11 +2913,11 @@ xlog_recover_process(
+        * zero CRC check prevents warnings from being emitted when upgrading
+        * the kernel from one that does not add CRCs by default.
+        */
+-      if (crc != old_crc) {
+-              if (old_crc || xfs_has_crc(log->l_mp)) {
++      if (crc != expected_crc) {
++              if (expected_crc || xfs_has_crc(log->l_mp)) {
+                       xfs_alert(log->l_mp,
+               "log record CRC mismatch: found 0x%x, expected 0x%x.",
+-                                      le32_to_cpu(old_crc),
++                                      le32_to_cpu(expected_crc),
+                                       le32_to_cpu(crc));
+                       xfs_hex_dump(dp, 32);
+               }
diff --git a/queue-6.12/xfs-use-deferred-intent-items-for-reaping-crosslinked-blocks.patch b/queue-6.12/xfs-use-deferred-intent-items-for-reaping-crosslinked-blocks.patch

new file mode 100644 (file)

index 0000000..e52c520
--- /dev/null
+++ b/queue-6.12/xfs-use-deferred-intent-items-for-reaping-crosslinked-blocks.patch
@@ -0,0 +1,55 @@
+From stable+bounces-188157-greg=kroah.com@vger.kernel.org Mon Oct 20 17:49:59 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Oct 2025 11:49:51 -0400
+Subject: xfs: use deferred intent items for reaping crosslinked blocks
+To: stable@vger.kernel.org
+Cc: "Darrick J. Wong" <djwong@kernel.org>, Christoph Hellwig <hch@lst.de>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251020154951.1825215-1-sashal@kernel.org>
+
+From: "Darrick J. Wong" <djwong@kernel.org>
+
+[ Upstream commit cd32a0c0dcdf634f2e0e71f41c272e19dece6264 ]
+
+When we're removing rmap records for crosslinked blocks, use deferred
+intent items so that we can try to free/unmap as many of the old data
+structure's blocks as we can in the same transaction as the commit.
+
+Cc: <stable@vger.kernel.org> # v6.6
+Fixes: 1c7ce115e52106 ("xfs: reap large AG metadata extents when possible")
+Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+[ adapted xfs_refcount_free_cow_extent() and xfs_rmap_free_extent() ]
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/scrub/reap.c |    9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+--- a/fs/xfs/scrub/reap.c
++++ b/fs/xfs/scrub/reap.c
+@@ -409,8 +409,6 @@ xreap_agextent_iter(
+       if (crosslinked) {
+               trace_xreap_dispose_unmap_extent(sc->sa.pag, agbno, *aglenp);
+ 
+-              rs->force_roll = true;
+-
+               if (rs->oinfo == &XFS_RMAP_OINFO_COW) {
+                       /*
+                        * If we're unmapping CoW staging extents, remove the
+@@ -418,11 +416,14 @@ xreap_agextent_iter(
+                        * rmap record as well.
+                        */
+                       xfs_refcount_free_cow_extent(sc->tp, fsbno, *aglenp);
++                      rs->force_roll = true;
+                       return 0;
+               }
+ 
+-              return xfs_rmap_free(sc->tp, sc->sa.agf_bp, sc->sa.pag, agbno,
+-                              *aglenp, rs->oinfo);
++              xfs_rmap_free_extent(sc->tp, sc->sa.pag->pag_agno, agbno,
++                              *aglenp, rs->oinfo->oi_owner);
++              rs->deferred++;
++              return 0;
+       }
+ 
+       trace_xreap_dispose_free_extent(sc->sa.pag, agbno, *aglenp);
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 21 Oct 2025 18:01:08 +0000 (20:01 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 21 Oct 2025 18:01:08 +0000 (20:01 +0200)
queue-6.12/d_alloc_parallel-set-dcache_par_lookup-earlier.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/hfsplus-fix-slab-out-of-bounds-read-in-hfsplus_strcasecmp.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/iio-imu-inv_icm42600-avoid-configuring-if-already-pm_runtime-suspended.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/iio-imu-inv_icm42600-simplify-pm_runtime-setup.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/ipv4-adopt-dst_dev-skb_dst_dev-and-skb_dst_dev_net.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/ixgbevf-add-support-for-intel-r-e610-device.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/ixgbevf-fix-getting-link-speed-data-for-e610-devices.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/ixgbevf-fix-mailbox-api-compatibility-by-negotiating-supported-features.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/md-fix-mssing-blktrace-bio-split-events.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/md-raid0-handle-bio_split-errors.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/md-raid1-handle-bio_split-errors.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/md-raid10-handle-bio_split-errors.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/mptcp-call-dst_release-in-mptcp_active_enable.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/mptcp-reset-blackhole-on-success-with-non-loopback-ifaces.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/mptcp-use-__sk_dst_get-and-dst_dev_rcu-in-mptcp_active_enable.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/net-add-locking-to-protect-skb-dev-access-in-ip_output.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/net-dst-add-four-helpers-to-annotate-data-races-around-dst-dev.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/nfsd-define-a-proc_layoutcommit-for-the-flexfiles-layout-type.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/nfsd-drop-dprintk-in-blocklayout-xdr-functions.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/nfsd-fix-last-write-offset-handling-in-layoutcommit.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/nfsd-implement-large-extent-array-support-in-pnfs.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/nfsd-minor-cleanup-in-layoutcommit-processing.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/nfsd-rework-encoding-and-decoding-of-nfsd4_deviceid.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/nfsd-use-correct-error-code-when-decoding-extents.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/padata-reset-next-cpu-when-reorder-sequence-wraps-around.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/pci-add-pci_vdevice_sub-helper-macro.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/phy-cadence-cdns-dphy-fix-pll-lock-and-o_cmn_ready-polling.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/phy-cadence-cdns-dphy-update-calibration-wait-time-for-startup-state-machine.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/phy-cdns-dphy-store-hs_clk_rate-and-return-it.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/pm-runtime-add-new-devm-functions.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/series		patch \| blob \| blame \| history
queue-6.12/tcp-cache-rtax_quickack-metric-in-a-hot-cache-line.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/tcp-convert-to-dev_net_rcu.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/vfs-don-t-leak-disconnected-dentries-on-umount.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/wifi-rtw89-avoid-possible-tx-wait-initialization-race.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/x86-resctrl-fix-miscount-of-bandwidth-event-when-reactivating-previously-unavailable-rmid.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/x86-resctrl-refactor-resctrl_arch_rmid_read.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/xfs-fix-log-crc-mismatches-between-i386-and-other-architectures.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/xfs-rename-the-old_crc-variable-in-xlog_recover_process.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/xfs-use-deferred-intent-items-for-reaping-crosslinked-blocks.patch	[new file with mode: 0644]	patch \| blob