]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.15-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 5 Apr 2024 10:58:32 +0000 (12:58 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 5 Apr 2024 10:58:32 +0000 (12:58 +0200)
added patches:
erspan-make-sure-erspan_base_hdr-is-present-in-skb-head.patch
i40e-fix-i40e_count_filters-to-count-only-active-new-filters.patch
i40e-fix-vf-may-be-used-uninitialized-in-this-function-warning.patch
ipv6-fix-infinite-recursion-in-fib6_dump_done.patch
mlxbf_gige-stop-interface-during-shutdown.patch
octeontx2-af-fix-issue-with-loading-coalesced-kpu-profiles.patch
octeontx2-pf-check-negative-error-code-in-otx2_open.patch
selftests-reuseaddr_conflict-add-missing-new-line-at-the-end-of-the-output.patch
udp-do-not-accept-non-tunnel-gso-skbs-landing-in-a-tunnel.patch
udp-do-not-transition-udp-gro-fraglist-partial-checksums-to-unnecessary.patch
udp-prevent-local-udp-tunnel-packets-from-being-groed.patch

12 files changed:
queue-5.15/erspan-make-sure-erspan_base_hdr-is-present-in-skb-head.patch [new file with mode: 0644]
queue-5.15/i40e-fix-i40e_count_filters-to-count-only-active-new-filters.patch [new file with mode: 0644]
queue-5.15/i40e-fix-vf-may-be-used-uninitialized-in-this-function-warning.patch [new file with mode: 0644]
queue-5.15/ipv6-fix-infinite-recursion-in-fib6_dump_done.patch [new file with mode: 0644]
queue-5.15/mlxbf_gige-stop-interface-during-shutdown.patch [new file with mode: 0644]
queue-5.15/octeontx2-af-fix-issue-with-loading-coalesced-kpu-profiles.patch [new file with mode: 0644]
queue-5.15/octeontx2-pf-check-negative-error-code-in-otx2_open.patch [new file with mode: 0644]
queue-5.15/selftests-reuseaddr_conflict-add-missing-new-line-at-the-end-of-the-output.patch [new file with mode: 0644]
queue-5.15/series
queue-5.15/udp-do-not-accept-non-tunnel-gso-skbs-landing-in-a-tunnel.patch [new file with mode: 0644]
queue-5.15/udp-do-not-transition-udp-gro-fraglist-partial-checksums-to-unnecessary.patch [new file with mode: 0644]
queue-5.15/udp-prevent-local-udp-tunnel-packets-from-being-groed.patch [new file with mode: 0644]

diff --git a/queue-5.15/erspan-make-sure-erspan_base_hdr-is-present-in-skb-head.patch b/queue-5.15/erspan-make-sure-erspan_base_hdr-is-present-in-skb-head.patch
new file mode 100644 (file)
index 0000000..8b6b11c
--- /dev/null
@@ -0,0 +1,121 @@
+From 17af420545a750f763025149fa7b833a4fc8b8f0 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Thu, 28 Mar 2024 11:22:48 +0000
+Subject: erspan: make sure erspan_base_hdr is present in skb->head
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit 17af420545a750f763025149fa7b833a4fc8b8f0 upstream.
+
+syzbot reported a problem in ip6erspan_rcv() [1]
+
+Issue is that ip6erspan_rcv() (and erspan_rcv()) no longer make
+sure erspan_base_hdr is present in skb linear part (skb->head)
+before getting @ver field from it.
+
+Add the missing pskb_may_pull() calls.
+
+v2: Reload iph pointer in erspan_rcv() after pskb_may_pull()
+    because skb->head might have changed.
+
+[1]
+
+ BUG: KMSAN: uninit-value in pskb_may_pull_reason include/linux/skbuff.h:2742 [inline]
+ BUG: KMSAN: uninit-value in pskb_may_pull include/linux/skbuff.h:2756 [inline]
+ BUG: KMSAN: uninit-value in ip6erspan_rcv net/ipv6/ip6_gre.c:541 [inline]
+ BUG: KMSAN: uninit-value in gre_rcv+0x11f8/0x1930 net/ipv6/ip6_gre.c:610
+  pskb_may_pull_reason include/linux/skbuff.h:2742 [inline]
+  pskb_may_pull include/linux/skbuff.h:2756 [inline]
+  ip6erspan_rcv net/ipv6/ip6_gre.c:541 [inline]
+  gre_rcv+0x11f8/0x1930 net/ipv6/ip6_gre.c:610
+  ip6_protocol_deliver_rcu+0x1d4c/0x2ca0 net/ipv6/ip6_input.c:438
+  ip6_input_finish net/ipv6/ip6_input.c:483 [inline]
+  NF_HOOK include/linux/netfilter.h:314 [inline]
+  ip6_input+0x15d/0x430 net/ipv6/ip6_input.c:492
+  ip6_mc_input+0xa7e/0xc80 net/ipv6/ip6_input.c:586
+  dst_input include/net/dst.h:460 [inline]
+  ip6_rcv_finish+0x955/0x970 net/ipv6/ip6_input.c:79
+  NF_HOOK include/linux/netfilter.h:314 [inline]
+  ipv6_rcv+0xde/0x390 net/ipv6/ip6_input.c:310
+  __netif_receive_skb_one_core net/core/dev.c:5538 [inline]
+  __netif_receive_skb+0x1da/0xa00 net/core/dev.c:5652
+  netif_receive_skb_internal net/core/dev.c:5738 [inline]
+  netif_receive_skb+0x58/0x660 net/core/dev.c:5798
+  tun_rx_batched+0x3ee/0x980 drivers/net/tun.c:1549
+  tun_get_user+0x5566/0x69e0 drivers/net/tun.c:2002
+  tun_chr_write_iter+0x3af/0x5d0 drivers/net/tun.c:2048
+  call_write_iter include/linux/fs.h:2108 [inline]
+  new_sync_write fs/read_write.c:497 [inline]
+  vfs_write+0xb63/0x1520 fs/read_write.c:590
+  ksys_write+0x20f/0x4c0 fs/read_write.c:643
+  __do_sys_write fs/read_write.c:655 [inline]
+  __se_sys_write fs/read_write.c:652 [inline]
+  __x64_sys_write+0x93/0xe0 fs/read_write.c:652
+ do_syscall_64+0xd5/0x1f0
+ entry_SYSCALL_64_after_hwframe+0x6d/0x75
+
+Uninit was created at:
+  slab_post_alloc_hook mm/slub.c:3804 [inline]
+  slab_alloc_node mm/slub.c:3845 [inline]
+  kmem_cache_alloc_node+0x613/0xc50 mm/slub.c:3888
+  kmalloc_reserve+0x13d/0x4a0 net/core/skbuff.c:577
+  __alloc_skb+0x35b/0x7a0 net/core/skbuff.c:668
+  alloc_skb include/linux/skbuff.h:1318 [inline]
+  alloc_skb_with_frags+0xc8/0xbf0 net/core/skbuff.c:6504
+  sock_alloc_send_pskb+0xa81/0xbf0 net/core/sock.c:2795
+  tun_alloc_skb drivers/net/tun.c:1525 [inline]
+  tun_get_user+0x209a/0x69e0 drivers/net/tun.c:1846
+  tun_chr_write_iter+0x3af/0x5d0 drivers/net/tun.c:2048
+  call_write_iter include/linux/fs.h:2108 [inline]
+  new_sync_write fs/read_write.c:497 [inline]
+  vfs_write+0xb63/0x1520 fs/read_write.c:590
+  ksys_write+0x20f/0x4c0 fs/read_write.c:643
+  __do_sys_write fs/read_write.c:655 [inline]
+  __se_sys_write fs/read_write.c:652 [inline]
+  __x64_sys_write+0x93/0xe0 fs/read_write.c:652
+ do_syscall_64+0xd5/0x1f0
+ entry_SYSCALL_64_after_hwframe+0x6d/0x75
+
+CPU: 1 PID: 5045 Comm: syz-executor114 Not tainted 6.9.0-rc1-syzkaller-00021-g962490525cff #0
+
+Fixes: cb73ee40b1b3 ("net: ip_gre: use erspan key field for tunnel lookup")
+Reported-by: syzbot+1c1cf138518bf0c53d68@syzkaller.appspotmail.com
+Closes: https://lore.kernel.org/netdev/000000000000772f2c0614b66ef7@google.com/
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Lorenzo Bianconi <lorenzo@kernel.org>
+Link: https://lore.kernel.org/r/20240328112248.1101491-1-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ip_gre.c  |    5 +++++
+ net/ipv6/ip6_gre.c |    3 +++
+ 2 files changed, 8 insertions(+)
+
+--- a/net/ipv4/ip_gre.c
++++ b/net/ipv4/ip_gre.c
+@@ -280,8 +280,13 @@ static int erspan_rcv(struct sk_buff *sk
+                                         tpi->flags | TUNNEL_NO_KEY,
+                                         iph->saddr, iph->daddr, 0);
+       } else {
++              if (unlikely(!pskb_may_pull(skb,
++                                          gre_hdr_len + sizeof(*ershdr))))
++                      return PACKET_REJECT;
++
+               ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
+               ver = ershdr->ver;
++              iph = ip_hdr(skb);
+               tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
+                                         tpi->flags | TUNNEL_KEY,
+                                         iph->saddr, iph->daddr, tpi->key);
+--- a/net/ipv6/ip6_gre.c
++++ b/net/ipv6/ip6_gre.c
+@@ -533,6 +533,9 @@ static int ip6erspan_rcv(struct sk_buff
+       struct ip6_tnl *tunnel;
+       u8 ver;
++      if (unlikely(!pskb_may_pull(skb, sizeof(*ershdr))))
++              return PACKET_REJECT;
++
+       ipv6h = ipv6_hdr(skb);
+       ershdr = (struct erspan_base_hdr *)skb->data;
+       ver = ershdr->ver;
diff --git a/queue-5.15/i40e-fix-i40e_count_filters-to-count-only-active-new-filters.patch b/queue-5.15/i40e-fix-i40e_count_filters-to-count-only-active-new-filters.patch
new file mode 100644 (file)
index 0000000..0bd3777
--- /dev/null
@@ -0,0 +1,44 @@
+From eb58c598ce45b7e787568fe27016260417c3d807 Mon Sep 17 00:00:00 2001
+From: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
+Date: Wed, 13 Mar 2024 10:44:00 +0100
+Subject: i40e: fix i40e_count_filters() to count only active/new filters
+
+From: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
+
+commit eb58c598ce45b7e787568fe27016260417c3d807 upstream.
+
+The bug usually affects untrusted VFs, because they are limited to 18 MACs,
+it affects them badly, not letting to create MAC all filters.
+Not stable to reproduce, it happens when VF user creates MAC filters
+when other MACVLAN operations are happened in parallel.
+But consequence is that VF can't receive desired traffic.
+
+Fix counter to be bumped only for new or active filters.
+
+Fixes: 621650cabee5 ("i40e: Refactoring VF MAC filters counting to make more reliable")
+Signed-off-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
+Reviewed-by: Arkadiusz Kubalewski <arkadiusz.kubalewski@intel.com>
+Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>
+Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/intel/i40e/i40e_main.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
+@@ -1231,8 +1231,11 @@ int i40e_count_filters(struct i40e_vsi *
+       int bkt;
+       int cnt = 0;
+-      hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist)
+-              ++cnt;
++      hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
++              if (f->state == I40E_FILTER_NEW ||
++                  f->state == I40E_FILTER_ACTIVE)
++                      ++cnt;
++      }
+       return cnt;
+ }
diff --git a/queue-5.15/i40e-fix-vf-may-be-used-uninitialized-in-this-function-warning.patch b/queue-5.15/i40e-fix-vf-may-be-used-uninitialized-in-this-function-warning.patch
new file mode 100644 (file)
index 0000000..da57110
--- /dev/null
@@ -0,0 +1,146 @@
+From f37c4eac99c258111d414d31b740437e1925b8e8 Mon Sep 17 00:00:00 2001
+From: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
+Date: Wed, 13 Mar 2024 10:56:39 +0100
+Subject: i40e: fix vf may be used uninitialized in this function warning
+
+From: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
+
+commit f37c4eac99c258111d414d31b740437e1925b8e8 upstream.
+
+To fix the regression introduced by commit 52424f974bc5, which causes
+servers hang in very hard to reproduce conditions with resets races.
+Using two sources for the information is the root cause.
+In this function before the fix bumping v didn't mean bumping vf
+pointer. But the code used this variables interchangeably, so stale vf
+could point to different/not intended vf.
+
+Remove redundant "v" variable and iterate via single VF pointer across
+whole function instead to guarantee VF pointer validity.
+
+Fixes: 52424f974bc5 ("i40e: Fix VF hang when reset is triggered on another VF")
+Signed-off-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
+Reviewed-by: Arkadiusz Kubalewski <arkadiusz.kubalewski@intel.com>
+Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
+Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>
+Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c |   34 +++++++++------------
+ 1 file changed, 16 insertions(+), 18 deletions(-)
+
+--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+@@ -1626,8 +1626,8 @@ bool i40e_reset_all_vfs(struct i40e_pf *
+ {
+       struct i40e_hw *hw = &pf->hw;
+       struct i40e_vf *vf;
+-      int i, v;
+       u32 reg;
++      int i;
+       /* If we don't have any VFs, then there is nothing to reset */
+       if (!pf->num_alloc_vfs)
+@@ -1638,11 +1638,10 @@ bool i40e_reset_all_vfs(struct i40e_pf *
+               return false;
+       /* Begin reset on all VFs at once */
+-      for (v = 0; v < pf->num_alloc_vfs; v++) {
+-              vf = &pf->vf[v];
++      for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) {
+               /* If VF is being reset no need to trigger reset again */
+               if (!test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states))
+-                      i40e_trigger_vf_reset(&pf->vf[v], flr);
++                      i40e_trigger_vf_reset(vf, flr);
+       }
+       /* HW requires some time to make sure it can flush the FIFO for a VF
+@@ -1651,14 +1650,13 @@ bool i40e_reset_all_vfs(struct i40e_pf *
+        * the VFs using a simple iterator that increments once that VF has
+        * finished resetting.
+        */
+-      for (i = 0, v = 0; i < 10 && v < pf->num_alloc_vfs; i++) {
++      for (i = 0, vf = &pf->vf[0]; i < 10 && vf < &pf->vf[pf->num_alloc_vfs]; ++i) {
+               usleep_range(10000, 20000);
+               /* Check each VF in sequence, beginning with the VF to fail
+                * the previous check.
+                */
+-              while (v < pf->num_alloc_vfs) {
+-                      vf = &pf->vf[v];
++              while (vf < &pf->vf[pf->num_alloc_vfs]) {
+                       if (!test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) {
+                               reg = rd32(hw, I40E_VPGEN_VFRSTAT(vf->vf_id));
+                               if (!(reg & I40E_VPGEN_VFRSTAT_VFRD_MASK))
+@@ -1668,7 +1666,7 @@ bool i40e_reset_all_vfs(struct i40e_pf *
+                       /* If the current VF has finished resetting, move on
+                        * to the next VF in sequence.
+                        */
+-                      v++;
++                      ++vf;
+               }
+       }
+@@ -1678,39 +1676,39 @@ bool i40e_reset_all_vfs(struct i40e_pf *
+       /* Display a warning if at least one VF didn't manage to reset in
+        * time, but continue on with the operation.
+        */
+-      if (v < pf->num_alloc_vfs)
++      if (vf < &pf->vf[pf->num_alloc_vfs])
+               dev_err(&pf->pdev->dev, "VF reset check timeout on VF %d\n",
+-                      pf->vf[v].vf_id);
++                      vf->vf_id);
+       usleep_range(10000, 20000);
+       /* Begin disabling all the rings associated with VFs, but do not wait
+        * between each VF.
+        */
+-      for (v = 0; v < pf->num_alloc_vfs; v++) {
++      for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) {
+               /* On initial reset, we don't have any queues to disable */
+-              if (pf->vf[v].lan_vsi_idx == 0)
++              if (vf->lan_vsi_idx == 0)
+                       continue;
+               /* If VF is reset in another thread just continue */
+               if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states))
+                       continue;
+-              i40e_vsi_stop_rings_no_wait(pf->vsi[pf->vf[v].lan_vsi_idx]);
++              i40e_vsi_stop_rings_no_wait(pf->vsi[vf->lan_vsi_idx]);
+       }
+       /* Now that we've notified HW to disable all of the VF rings, wait
+        * until they finish.
+        */
+-      for (v = 0; v < pf->num_alloc_vfs; v++) {
++      for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) {
+               /* On initial reset, we don't have any queues to disable */
+-              if (pf->vf[v].lan_vsi_idx == 0)
++              if (vf->lan_vsi_idx == 0)
+                       continue;
+               /* If VF is reset in another thread just continue */
+               if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states))
+                       continue;
+-              i40e_vsi_wait_queues_disabled(pf->vsi[pf->vf[v].lan_vsi_idx]);
++              i40e_vsi_wait_queues_disabled(pf->vsi[vf->lan_vsi_idx]);
+       }
+       /* Hw may need up to 50ms to finish disabling the RX queues. We
+@@ -1719,12 +1717,12 @@ bool i40e_reset_all_vfs(struct i40e_pf *
+       mdelay(50);
+       /* Finish the reset on each VF */
+-      for (v = 0; v < pf->num_alloc_vfs; v++) {
++      for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) {
+               /* If VF is reset in another thread just continue */
+               if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states))
+                       continue;
+-              i40e_cleanup_reset_vf(&pf->vf[v]);
++              i40e_cleanup_reset_vf(vf);
+       }
+       i40e_flush(hw);
diff --git a/queue-5.15/ipv6-fix-infinite-recursion-in-fib6_dump_done.patch b/queue-5.15/ipv6-fix-infinite-recursion-in-fib6_dump_done.patch
new file mode 100644 (file)
index 0000000..780311e
--- /dev/null
@@ -0,0 +1,134 @@
+From d21d40605bca7bd5fc23ef03d4c1ca1f48bc2cae Mon Sep 17 00:00:00 2001
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+Date: Mon, 1 Apr 2024 14:10:04 -0700
+Subject: ipv6: Fix infinite recursion in fib6_dump_done().
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+commit d21d40605bca7bd5fc23ef03d4c1ca1f48bc2cae upstream.
+
+syzkaller reported infinite recursive calls of fib6_dump_done() during
+netlink socket destruction.  [1]
+
+From the log, syzkaller sent an AF_UNSPEC RTM_GETROUTE message, and then
+the response was generated.  The following recvmmsg() resumed the dump
+for IPv6, but the first call of inet6_dump_fib() failed at kzalloc() due
+to the fault injection.  [0]
+
+  12:01:34 executing program 3:
+  r0 = socket$nl_route(0x10, 0x3, 0x0)
+  sendmsg$nl_route(r0, ... snip ...)
+  recvmmsg(r0, ... snip ...) (fail_nth: 8)
+
+Here, fib6_dump_done() was set to nlk_sk(sk)->cb.done, and the next call
+of inet6_dump_fib() set it to nlk_sk(sk)->cb.args[3].  syzkaller stopped
+receiving the response halfway through, and finally netlink_sock_destruct()
+called nlk_sk(sk)->cb.done().
+
+fib6_dump_done() calls fib6_dump_end() and nlk_sk(sk)->cb.done() if it
+is still not NULL.  fib6_dump_end() rewrites nlk_sk(sk)->cb.done() by
+nlk_sk(sk)->cb.args[3], but it has the same function, not NULL, calling
+itself recursively and hitting the stack guard page.
+
+To avoid the issue, let's set the destructor after kzalloc().
+
+[0]:
+FAULT_INJECTION: forcing a failure.
+name failslab, interval 1, probability 0, space 0, times 0
+CPU: 1 PID: 432110 Comm: syz-executor.3 Not tainted 6.8.0-12821-g537c2e91d354-dirty #11
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
+Call Trace:
+ <TASK>
+ dump_stack_lvl (lib/dump_stack.c:117)
+ should_fail_ex (lib/fault-inject.c:52 lib/fault-inject.c:153)
+ should_failslab (mm/slub.c:3733)
+ kmalloc_trace (mm/slub.c:3748 mm/slub.c:3827 mm/slub.c:3992)
+ inet6_dump_fib (./include/linux/slab.h:628 ./include/linux/slab.h:749 net/ipv6/ip6_fib.c:662)
+ rtnl_dump_all (net/core/rtnetlink.c:4029)
+ netlink_dump (net/netlink/af_netlink.c:2269)
+ netlink_recvmsg (net/netlink/af_netlink.c:1988)
+ ____sys_recvmsg (net/socket.c:1046 net/socket.c:2801)
+ ___sys_recvmsg (net/socket.c:2846)
+ do_recvmmsg (net/socket.c:2943)
+ __x64_sys_recvmmsg (net/socket.c:3041 net/socket.c:3034 net/socket.c:3034)
+
+[1]:
+BUG: TASK stack guard page was hit at 00000000f2fa9af1 (stack is 00000000b7912430..000000009a436beb)
+stack guard page: 0000 [#1] PREEMPT SMP KASAN
+CPU: 1 PID: 223719 Comm: kworker/1:3 Not tainted 6.8.0-12821-g537c2e91d354-dirty #11
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
+Workqueue: events netlink_sock_destruct_work
+RIP: 0010:fib6_dump_done (net/ipv6/ip6_fib.c:570)
+Code: 3c 24 e8 f3 e9 51 fd e9 28 fd ff ff 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 f3 0f 1e fa 41 57 41 56 41 55 41 54 55 48 89 fd <53> 48 8d 5d 60 e8 b6 4d 07 fd 48 89 da 48 b8 00 00 00 00 00 fc ff
+RSP: 0018:ffffc9000d980000 EFLAGS: 00010293
+RAX: 0000000000000000 RBX: ffffffff84405990 RCX: ffffffff844059d3
+RDX: ffff8881028e0000 RSI: ffffffff84405ac2 RDI: ffff88810c02f358
+RBP: ffff88810c02f358 R08: 0000000000000007 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000224 R12: 0000000000000000
+R13: ffff888007c82c78 R14: ffff888007c82c68 R15: ffff888007c82c68
+FS:  0000000000000000(0000) GS:ffff88811b100000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: ffffc9000d97fff8 CR3: 0000000102309002 CR4: 0000000000770ef0
+PKRU: 55555554
+Call Trace:
+ <#DF>
+ </#DF>
+ <TASK>
+ fib6_dump_done (net/ipv6/ip6_fib.c:572 (discriminator 1))
+ fib6_dump_done (net/ipv6/ip6_fib.c:572 (discriminator 1))
+ ...
+ fib6_dump_done (net/ipv6/ip6_fib.c:572 (discriminator 1))
+ fib6_dump_done (net/ipv6/ip6_fib.c:572 (discriminator 1))
+ netlink_sock_destruct (net/netlink/af_netlink.c:401)
+ __sk_destruct (net/core/sock.c:2177 (discriminator 2))
+ sk_destruct (net/core/sock.c:2224)
+ __sk_free (net/core/sock.c:2235)
+ sk_free (net/core/sock.c:2246)
+ process_one_work (kernel/workqueue.c:3259)
+ worker_thread (kernel/workqueue.c:3329 kernel/workqueue.c:3416)
+ kthread (kernel/kthread.c:388)
+ ret_from_fork (arch/x86/kernel/process.c:153)
+ ret_from_fork_asm (arch/x86/entry/entry_64.S:256)
+Modules linked in:
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Reported-by: syzkaller <syzkaller@googlegroups.com>
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Link: https://lore.kernel.org/r/20240401211003.25274-1-kuniyu@amazon.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_fib.c |   14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+--- a/net/ipv6/ip6_fib.c
++++ b/net/ipv6/ip6_fib.c
+@@ -645,19 +645,19 @@ static int inet6_dump_fib(struct sk_buff
+       if (!w) {
+               /* New dump:
+                *
+-               * 1. hook callback destructor.
+-               */
+-              cb->args[3] = (long)cb->done;
+-              cb->done = fib6_dump_done;
+-
+-              /*
+-               * 2. allocate and initialize walker.
++               * 1. allocate and initialize walker.
+                */
+               w = kzalloc(sizeof(*w), GFP_ATOMIC);
+               if (!w)
+                       return -ENOMEM;
+               w->func = fib6_dump_node;
+               cb->args[2] = (long)w;
++
++              /* 2. hook callback destructor.
++               */
++              cb->args[3] = (long)cb->done;
++              cb->done = fib6_dump_done;
++
+       }
+       arg.skb = skb;
diff --git a/queue-5.15/mlxbf_gige-stop-interface-during-shutdown.patch b/queue-5.15/mlxbf_gige-stop-interface-during-shutdown.patch
new file mode 100644 (file)
index 0000000..9ded6d7
--- /dev/null
@@ -0,0 +1,109 @@
+From 09ba28e1cd3cf715daab1fca6e1623e22fd754a6 Mon Sep 17 00:00:00 2001
+From: David Thompson <davthompson@nvidia.com>
+Date: Mon, 25 Mar 2024 17:09:29 -0400
+Subject: mlxbf_gige: stop interface during shutdown
+
+From: David Thompson <davthompson@nvidia.com>
+
+commit 09ba28e1cd3cf715daab1fca6e1623e22fd754a6 upstream.
+
+The mlxbf_gige driver intermittantly encounters a NULL pointer
+exception while the system is shutting down via "reboot" command.
+The mlxbf_driver will experience an exception right after executing
+its shutdown() method.  One example of this exception is:
+
+Unable to handle kernel NULL pointer dereference at virtual address 0000000000000070
+Mem abort info:
+  ESR = 0x0000000096000004
+  EC = 0x25: DABT (current EL), IL = 32 bits
+  SET = 0, FnV = 0
+  EA = 0, S1PTW = 0
+  FSC = 0x04: level 0 translation fault
+Data abort info:
+  ISV = 0, ISS = 0x00000004
+  CM = 0, WnR = 0
+user pgtable: 4k pages, 48-bit VAs, pgdp=000000011d373000
+[0000000000000070] pgd=0000000000000000, p4d=0000000000000000
+Internal error: Oops: 96000004 [#1] SMP
+CPU: 0 PID: 13 Comm: ksoftirqd/0 Tainted: G S         OE     5.15.0-bf.6.gef6992a #1
+Hardware name: https://www.mellanox.com BlueField SoC/BlueField SoC, BIOS 4.0.2.12669 Apr 21 2023
+pstate: 20400009 (nzCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
+pc : mlxbf_gige_handle_tx_complete+0xc8/0x170 [mlxbf_gige]
+lr : mlxbf_gige_poll+0x54/0x160 [mlxbf_gige]
+sp : ffff8000080d3c10
+x29: ffff8000080d3c10 x28: ffffcce72cbb7000 x27: ffff8000080d3d58
+x26: ffff0000814e7340 x25: ffff331cd1a05000 x24: ffffcce72c4ea008
+x23: ffff0000814e4b40 x22: ffff0000814e4d10 x21: ffff0000814e4128
+x20: 0000000000000000 x19: ffff0000814e4a80 x18: ffffffffffffffff
+x17: 000000000000001c x16: ffffcce72b4553f4 x15: ffff80008805b8a7
+x14: 0000000000000000 x13: 0000000000000030 x12: 0101010101010101
+x11: 7f7f7f7f7f7f7f7f x10: c2ac898b17576267 x9 : ffffcce720fa5404
+x8 : ffff000080812138 x7 : 0000000000002e9a x6 : 0000000000000080
+x5 : ffff00008de3b000 x4 : 0000000000000000 x3 : 0000000000000001
+x2 : 0000000000000000 x1 : 0000000000000000 x0 : 0000000000000000
+Call trace:
+ mlxbf_gige_handle_tx_complete+0xc8/0x170 [mlxbf_gige]
+ mlxbf_gige_poll+0x54/0x160 [mlxbf_gige]
+ __napi_poll+0x40/0x1c8
+ net_rx_action+0x314/0x3a0
+ __do_softirq+0x128/0x334
+ run_ksoftirqd+0x54/0x6c
+ smpboot_thread_fn+0x14c/0x190
+ kthread+0x10c/0x110
+ ret_from_fork+0x10/0x20
+Code: 8b070000 f9000ea0 f95056c0 f86178a1 (b9407002)
+---[ end trace 7cc3941aa0d8e6a4 ]---
+Kernel panic - not syncing: Oops: Fatal exception in interrupt
+Kernel Offset: 0x4ce722520000 from 0xffff800008000000
+PHYS_OFFSET: 0x80000000
+CPU features: 0x000005c1,a3330e5a
+Memory Limit: none
+---[ end Kernel panic - not syncing: Oops: Fatal exception in interrupt ]---
+
+During system shutdown, the mlxbf_gige driver's shutdown() is always executed.
+However, the driver's stop() method will only execute if networking interface
+configuration logic within the Linux distribution has been setup to do so.
+
+If shutdown() executes but stop() does not execute, NAPI remains enabled
+and this can lead to an exception if NAPI is scheduled while the hardware
+interface has only been partially deinitialized.
+
+The networking interface managed by the mlxbf_gige driver must be properly
+stopped during system shutdown so that IFF_UP is cleared, the hardware
+interface is put into a clean state, and NAPI is fully deinitialized.
+
+Fixes: f92e1869d74e ("Add Mellanox BlueField Gigabit Ethernet driver")
+Signed-off-by: David Thompson <davthompson@nvidia.com>
+Link: https://lore.kernel.org/r/20240325210929.25362-1-davthompson@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c |   10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
++++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
+@@ -14,6 +14,7 @@
+ #include <linux/module.h>
+ #include <linux/phy.h>
+ #include <linux/platform_device.h>
++#include <linux/rtnetlink.h>
+ #include <linux/skbuff.h>
+ #include "mlxbf_gige.h"
+@@ -419,8 +420,13 @@ static void mlxbf_gige_shutdown(struct p
+ {
+       struct mlxbf_gige *priv = platform_get_drvdata(pdev);
+-      writeq(0, priv->base + MLXBF_GIGE_INT_EN);
+-      mlxbf_gige_clean_port(priv);
++      rtnl_lock();
++      netif_device_detach(priv->netdev);
++
++      if (netif_running(priv->netdev))
++              dev_close(priv->netdev);
++
++      rtnl_unlock();
+ }
+ static const struct acpi_device_id __maybe_unused mlxbf_gige_acpi_match[] = {
diff --git a/queue-5.15/octeontx2-af-fix-issue-with-loading-coalesced-kpu-profiles.patch b/queue-5.15/octeontx2-af-fix-issue-with-loading-coalesced-kpu-profiles.patch
new file mode 100644 (file)
index 0000000..c2b3897
--- /dev/null
@@ -0,0 +1,36 @@
+From 0ba80d96585662299d4ea4624043759ce9015421 Mon Sep 17 00:00:00 2001
+From: Hariprasad Kelam <hkelam@marvell.com>
+Date: Tue, 26 Mar 2024 17:51:49 +0530
+Subject: octeontx2-af: Fix issue with loading coalesced KPU profiles
+
+From: Hariprasad Kelam <hkelam@marvell.com>
+
+commit 0ba80d96585662299d4ea4624043759ce9015421 upstream.
+
+The current implementation for loading coalesced KPU profiles has
+a limitation.  The "offset" field, which is used to locate profiles
+within the profile is restricted to a u16.
+
+This restricts the number of profiles that can be loaded. This patch
+addresses this limitation by increasing the size of the "offset" field.
+
+Fixes: 11c730bfbf5b ("octeontx2-af: support for coalescing KPU profiles")
+Signed-off-by: Hariprasad Kelam <hkelam@marvell.com>
+Reviewed-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+@@ -1622,7 +1622,7 @@ static int npc_fwdb_detect_load_prfl_img
+       struct npc_coalesced_kpu_prfl *img_data = NULL;
+       int i = 0, rc = -EINVAL;
+       void __iomem *kpu_prfl_addr;
+-      u16 offset;
++      u32 offset;
+       img_data = (struct npc_coalesced_kpu_prfl __force *)rvu->kpu_prfl_addr;
+       if (le64_to_cpu(img_data->signature) == KPU_SIGN &&
diff --git a/queue-5.15/octeontx2-pf-check-negative-error-code-in-otx2_open.patch b/queue-5.15/octeontx2-pf-check-negative-error-code-in-otx2_open.patch
new file mode 100644 (file)
index 0000000..85b3324
--- /dev/null
@@ -0,0 +1,35 @@
+From e709acbd84fb6ef32736331b0147f027a3ef4c20 Mon Sep 17 00:00:00 2001
+From: Su Hui <suhui@nfschina.com>
+Date: Thu, 28 Mar 2024 10:06:21 +0800
+Subject: octeontx2-pf: check negative error code in otx2_open()
+
+From: Su Hui <suhui@nfschina.com>
+
+commit e709acbd84fb6ef32736331b0147f027a3ef4c20 upstream.
+
+otx2_rxtx_enable() return negative error code such as -EIO,
+check -EIO rather than EIO to fix this problem.
+
+Fixes: c926252205c4 ("octeontx2-pf: Disable packet I/O for graceful exit")
+Signed-off-by: Su Hui <suhui@nfschina.com>
+Reviewed-by: Subbaraya Sundeep <sbhatta@marvell.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Reviewed-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
+Link: https://lore.kernel.org/r/20240328020620.4054692-1-suhui@nfschina.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+@@ -1795,7 +1795,7 @@ int otx2_open(struct net_device *netdev)
+        * mcam entries are enabled to receive the packets. Hence disable the
+        * packet I/O.
+        */
+-      if (err == EIO)
++      if (err == -EIO)
+               goto err_disable_rxtx;
+       else if (err)
+               goto err_tx_stop_queues;
diff --git a/queue-5.15/selftests-reuseaddr_conflict-add-missing-new-line-at-the-end-of-the-output.patch b/queue-5.15/selftests-reuseaddr_conflict-add-missing-new-line-at-the-end-of-the-output.patch
new file mode 100644 (file)
index 0000000..dcdc3e9
--- /dev/null
@@ -0,0 +1,39 @@
+From 31974122cfdeaf56abc18d8ab740d580d9833e90 Mon Sep 17 00:00:00 2001
+From: Jakub Kicinski <kuba@kernel.org>
+Date: Fri, 29 Mar 2024 09:05:59 -0700
+Subject: selftests: reuseaddr_conflict: add missing new line at the end of the output
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+commit 31974122cfdeaf56abc18d8ab740d580d9833e90 upstream.
+
+The netdev CI runs in a VM and captures serial, so stdout and
+stderr get combined. Because there's a missing new line in
+stderr the test ends up corrupting KTAP:
+
+  # Successok 1 selftests: net: reuseaddr_conflict
+
+which should have been:
+
+  # Success
+  ok 1 selftests: net: reuseaddr_conflict
+
+Fixes: 422d8dc6fd3a ("selftest: add a reuseaddr test")
+Reviewed-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
+Link: https://lore.kernel.org/r/20240329160559.249476-1-kuba@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/net/reuseaddr_conflict.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/tools/testing/selftests/net/reuseaddr_conflict.c
++++ b/tools/testing/selftests/net/reuseaddr_conflict.c
+@@ -109,6 +109,6 @@ int main(void)
+       fd1 = open_port(0, 1);
+       if (fd1 >= 0)
+               error(1, 0, "Was allowed to create an ipv4 reuseport on an already bound non-reuseport socket with no ipv6");
+-      fprintf(stderr, "Success");
++      fprintf(stderr, "Success\n");
+       return 0;
+ }
index ce75a721c72c4a6a7bcd98a3772f7d442172f272..9bc8c4babb72991669a235eb03a63ef6edc15367 100644 (file)
@@ -642,3 +642,14 @@ bpf-sockmap-prevent-lock-inversion-deadlock-in-map-delete-elem.patch
 net-sched-act_skbmod-prevent-kernel-infoleak.patch
 net-stmmac-fix-rx-queue-priority-assignment.patch
 selftests-net-gro-fwd-update-vxlan-gro-test-expectations.patch
+erspan-make-sure-erspan_base_hdr-is-present-in-skb-head.patch
+selftests-reuseaddr_conflict-add-missing-new-line-at-the-end-of-the-output.patch
+ipv6-fix-infinite-recursion-in-fib6_dump_done.patch
+mlxbf_gige-stop-interface-during-shutdown.patch
+udp-do-not-accept-non-tunnel-gso-skbs-landing-in-a-tunnel.patch
+udp-do-not-transition-udp-gro-fraglist-partial-checksums-to-unnecessary.patch
+udp-prevent-local-udp-tunnel-packets-from-being-groed.patch
+octeontx2-af-fix-issue-with-loading-coalesced-kpu-profiles.patch
+octeontx2-pf-check-negative-error-code-in-otx2_open.patch
+i40e-fix-i40e_count_filters-to-count-only-active-new-filters.patch
+i40e-fix-vf-may-be-used-uninitialized-in-this-function-warning.patch
diff --git a/queue-5.15/udp-do-not-accept-non-tunnel-gso-skbs-landing-in-a-tunnel.patch b/queue-5.15/udp-do-not-accept-non-tunnel-gso-skbs-landing-in-a-tunnel.patch
new file mode 100644 (file)
index 0000000..e57a1eb
--- /dev/null
@@ -0,0 +1,146 @@
+From 3d010c8031e39f5fa1e8b13ada77e0321091011f Mon Sep 17 00:00:00 2001
+From: Antoine Tenart <atenart@kernel.org>
+Date: Tue, 26 Mar 2024 12:33:58 +0100
+Subject: udp: do not accept non-tunnel GSO skbs landing in a tunnel
+
+From: Antoine Tenart <atenart@kernel.org>
+
+commit 3d010c8031e39f5fa1e8b13ada77e0321091011f upstream.
+
+When rx-udp-gro-forwarding is enabled UDP packets might be GROed when
+being forwarded. If such packets might land in a tunnel this can cause
+various issues and udp_gro_receive makes sure this isn't the case by
+looking for a matching socket. This is performed in
+udp4/6_gro_lookup_skb but only in the current netns. This is an issue
+with tunneled packets when the endpoint is in another netns. In such
+cases the packets will be GROed at the UDP level, which leads to various
+issues later on. The same thing can happen with rx-gro-list.
+
+We saw this with geneve packets being GROed at the UDP level. In such
+case gso_size is set; later the packet goes through the geneve rx path,
+the geneve header is pulled, the offset are adjusted and frag_list skbs
+are not adjusted with regard to geneve. When those skbs hit
+skb_fragment, it will misbehave. Different outcomes are possible
+depending on what the GROed skbs look like; from corrupted packets to
+kernel crashes.
+
+One example is a BUG_ON[1] triggered in skb_segment while processing the
+frag_list. Because gso_size is wrong (geneve header was pulled)
+skb_segment thinks there is "geneve header size" of data in frag_list,
+although it's in fact the next packet. The BUG_ON itself has nothing to
+do with the issue. This is only one of the potential issues.
+
+Looking up for a matching socket in udp_gro_receive is fragile: the
+lookup could be extended to all netns (not speaking about performances)
+but nothing prevents those packets from being modified in between and we
+could still not find a matching socket. It's OK to keep the current
+logic there as it should cover most cases but we also need to make sure
+we handle tunnel packets being GROed too early.
+
+This is done by extending the checks in udp_unexpected_gso: GSO packets
+lacking the SKB_GSO_UDP_TUNNEL/_CSUM bits and landing in a tunnel must
+be segmented.
+
+[1] kernel BUG at net/core/skbuff.c:4408!
+    RIP: 0010:skb_segment+0xd2a/0xf70
+    __udp_gso_segment+0xaa/0x560
+
+Fixes: 9fd1ff5d2ac7 ("udp: Support UDP fraglist GRO/GSO.")
+Fixes: 36707061d6ba ("udp: allow forwarding of plain (non-fraglisted) UDP GRO packets")
+Signed-off-by: Antoine Tenart <atenart@kernel.org>
+Reviewed-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/udp.h    |   28 ++++++++++++++++++++++++++++
+ net/ipv4/udp.c         |    7 +++++++
+ net/ipv4/udp_offload.c |    6 ++++--
+ net/ipv6/udp.c         |    2 +-
+ 4 files changed, 40 insertions(+), 3 deletions(-)
+
+--- a/include/linux/udp.h
++++ b/include/linux/udp.h
+@@ -132,6 +132,24 @@ static inline void udp_cmsg_recv(struct
+       }
+ }
++DECLARE_STATIC_KEY_FALSE(udp_encap_needed_key);
++#if IS_ENABLED(CONFIG_IPV6)
++DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
++#endif
++
++static inline bool udp_encap_needed(void)
++{
++      if (static_branch_unlikely(&udp_encap_needed_key))
++              return true;
++
++#if IS_ENABLED(CONFIG_IPV6)
++      if (static_branch_unlikely(&udpv6_encap_needed_key))
++              return true;
++#endif
++
++      return false;
++}
++
+ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb)
+ {
+       if (!skb_is_gso(skb))
+@@ -143,6 +161,16 @@ static inline bool udp_unexpected_gso(st
+       if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST && !udp_sk(sk)->accept_udp_fraglist)
+               return true;
++      /* GSO packets lacking the SKB_GSO_UDP_TUNNEL/_CSUM bits might still
++       * land in a tunnel as the socket check in udp_gro_receive cannot be
++       * foolproof.
++       */
++      if (udp_encap_needed() &&
++          READ_ONCE(udp_sk(sk)->encap_rcv) &&
++          !(skb_shinfo(skb)->gso_type &
++            (SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM)))
++              return true;
++
+       return false;
+ }
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -601,6 +601,13 @@ static inline bool __udp_is_mcast_sock(s
+ }
+ DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
++EXPORT_SYMBOL(udp_encap_needed_key);
++
++#if IS_ENABLED(CONFIG_IPV6)
++DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
++EXPORT_SYMBOL(udpv6_encap_needed_key);
++#endif
++
+ void udp_encap_enable(void)
+ {
+       static_branch_inc(&udp_encap_needed_key);
+--- a/net/ipv4/udp_offload.c
++++ b/net/ipv4/udp_offload.c
+@@ -515,8 +515,10 @@ struct sk_buff *udp_gro_receive(struct l
+       unsigned int off = skb_gro_offset(skb);
+       int flush = 1;
+-      /* we can do L4 aggregation only if the packet can't land in a tunnel
+-       * otherwise we could corrupt the inner stream
++      /* We can do L4 aggregation only if the packet can't land in a tunnel
++       * otherwise we could corrupt the inner stream. Detecting such packets
++       * cannot be foolproof and the aggregation might still happen in some
++       * cases. Such packets should be caught in udp_unexpected_gso later.
+        */
+       NAPI_GRO_CB(skb)->is_flist = 0;
+       if (!sk || !udp_sk(sk)->gro_receive) {
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -473,7 +473,7 @@ csum_copy_err:
+       goto try_again;
+ }
+-DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
++DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
+ void udpv6_encap_enable(void)
+ {
+       static_branch_inc(&udpv6_encap_needed_key);
diff --git a/queue-5.15/udp-do-not-transition-udp-gro-fraglist-partial-checksums-to-unnecessary.patch b/queue-5.15/udp-do-not-transition-udp-gro-fraglist-partial-checksums-to-unnecessary.patch
new file mode 100644 (file)
index 0000000..e0b7029
--- /dev/null
@@ -0,0 +1,74 @@
+From f0b8c30345565344df2e33a8417a27503589247d Mon Sep 17 00:00:00 2001
+From: Antoine Tenart <atenart@kernel.org>
+Date: Tue, 26 Mar 2024 12:34:00 +0100
+Subject: udp: do not transition UDP GRO fraglist partial checksums to unnecessary
+
+From: Antoine Tenart <atenart@kernel.org>
+
+commit f0b8c30345565344df2e33a8417a27503589247d upstream.
+
+UDP GRO validates checksums and in udp4/6_gro_complete fraglist packets
+are converted to CHECKSUM_UNNECESSARY to avoid later checks. However
+this is an issue for CHECKSUM_PARTIAL packets as they can be looped in
+an egress path and then their partial checksums are not fixed.
+
+Different issues can be observed, from invalid checksum on packets to
+traces like:
+
+  gen01: hw csum failure
+  skb len=3008 headroom=160 headlen=1376 tailroom=0
+  mac=(106,14) net=(120,40) trans=160
+  shinfo(txflags=0 nr_frags=0 gso(size=0 type=0 segs=0))
+  csum(0xffff232e ip_summed=2 complete_sw=0 valid=0 level=0)
+  hash(0x77e3d716 sw=1 l4=1) proto=0x86dd pkttype=0 iif=12
+  ...
+
+Fix this by only converting CHECKSUM_NONE packets to
+CHECKSUM_UNNECESSARY by reusing __skb_incr_checksum_unnecessary. All
+other checksum types are kept as-is, including CHECKSUM_COMPLETE as
+fraglist packets being segmented back would have their skb->csum valid.
+
+Fixes: 9fd1ff5d2ac7 ("udp: Support UDP fraglist GRO/GSO.")
+Signed-off-by: Antoine Tenart <atenart@kernel.org>
+Reviewed-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/udp_offload.c |    8 +-------
+ net/ipv6/udp_offload.c |    8 +-------
+ 2 files changed, 2 insertions(+), 14 deletions(-)
+
+--- a/net/ipv4/udp_offload.c
++++ b/net/ipv4/udp_offload.c
+@@ -685,13 +685,7 @@ INDIRECT_CALLABLE_SCOPE int udp4_gro_com
+               skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
+               skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+-              if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
+-                      if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
+-                              skb->csum_level++;
+-              } else {
+-                      skb->ip_summed = CHECKSUM_UNNECESSARY;
+-                      skb->csum_level = 0;
+-              }
++              __skb_incr_checksum_unnecessary(skb);
+               return 0;
+       }
+--- a/net/ipv6/udp_offload.c
++++ b/net/ipv6/udp_offload.c
+@@ -170,13 +170,7 @@ INDIRECT_CALLABLE_SCOPE int udp6_gro_com
+               skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
+               skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+-              if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
+-                      if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
+-                              skb->csum_level++;
+-              } else {
+-                      skb->ip_summed = CHECKSUM_UNNECESSARY;
+-                      skb->csum_level = 0;
+-              }
++              __skb_incr_checksum_unnecessary(skb);
+               return 0;
+       }
diff --git a/queue-5.15/udp-prevent-local-udp-tunnel-packets-from-being-groed.patch b/queue-5.15/udp-prevent-local-udp-tunnel-packets-from-being-groed.patch
new file mode 100644 (file)
index 0000000..167a1be
--- /dev/null
@@ -0,0 +1,54 @@
+From 64235eabc4b5b18c507c08a1f16cdac6c5661220 Mon Sep 17 00:00:00 2001
+From: Antoine Tenart <atenart@kernel.org>
+Date: Tue, 26 Mar 2024 12:34:01 +0100
+Subject: udp: prevent local UDP tunnel packets from being GROed
+
+From: Antoine Tenart <atenart@kernel.org>
+
+commit 64235eabc4b5b18c507c08a1f16cdac6c5661220 upstream.
+
+GRO has a fundamental issue with UDP tunnel packets as it can't detect
+those in a foolproof way and GRO could happen before they reach the
+tunnel endpoint. Previous commits have fixed issues when UDP tunnel
+packets come from a remote host, but if those packets are issued locally
+they could run into checksum issues.
+
+If the inner packet has a partial checksum the information will be lost
+in the GRO logic, either in udp4/6_gro_complete or in
+udp_gro_complete_segment and packets will have an invalid checksum when
+leaving the host.
+
+Prevent local UDP tunnel packets from ever being GROed at the outer UDP
+level.
+
+Due to skb->encapsulation being wrongly used in some drivers this is
+actually only preventing UDP tunnel packets with a partial checksum to
+be GROed (see iptunnel_handle_offloads) but those were also the packets
+triggering issues so in practice this should be sufficient.
+
+Fixes: 9fd1ff5d2ac7 ("udp: Support UDP fraglist GRO/GSO.")
+Fixes: 36707061d6ba ("udp: allow forwarding of plain (non-fraglisted) UDP GRO packets")
+Suggested-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Antoine Tenart <atenart@kernel.org>
+Reviewed-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/udp_offload.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/net/ipv4/udp_offload.c
++++ b/net/ipv4/udp_offload.c
+@@ -522,6 +522,12 @@ struct sk_buff *udp_gro_receive(struct l
+        */
+       NAPI_GRO_CB(skb)->is_flist = 0;
+       if (!sk || !udp_sk(sk)->gro_receive) {
++              /* If the packet was locally encapsulated in a UDP tunnel that
++               * wasn't detected above, do not GRO.
++               */
++              if (skb->encapsulation)
++                      goto out;
++
+               if (skb->dev->features & NETIF_F_GRO_FRAGLIST)
+                       NAPI_GRO_CB(skb)->is_flist = sk ? !udp_sk(sk)->gro_enabled : 1;