Fixes for 6.6

author Sasha Levin <sashal@kernel.org>

Sat, 18 Jan 2025 17:23:27 +0000 (12:23 -0500)

committer Sasha Levin <sashal@kernel.org>

Sat, 18 Jan 2025 17:23:27 +0000 (12:23 -0500)
author Sasha Levin <sashal@kernel.org>
Sat, 18 Jan 2025 17:23:27 +0000 (12:23 -0500)
committer Sasha Levin <sashal@kernel.org>
Sat, 18 Jan 2025 17:23:27 +0000 (12:23 -0500)
diff --git a/queue-6.6/bpf-fix-bpf_sk_select_reuseport-memory-leak.patch b/queue-6.6/bpf-fix-bpf_sk_select_reuseport-memory-leak.patch

new file mode 100644 (file)

index 0000000..49cb87c
--- /dev/null
+++ b/queue-6.6/bpf-fix-bpf_sk_select_reuseport-memory-leak.patch
@@ -0,0 +1,112 @@
+From 963c508991eb81bf2e29af5dd3f35639ef0ab4f2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 10 Jan 2025 14:21:55 +0100
+Subject: bpf: Fix bpf_sk_select_reuseport() memory leak
+
+From: Michal Luczaj <mhal@rbox.co>
+
+[ Upstream commit b3af60928ab9129befa65e6df0310d27300942bf ]
+
+As pointed out in the original comment, lookup in sockmap can return a TCP
+ESTABLISHED socket. Such TCP socket may have had SO_ATTACH_REUSEPORT_EBPF
+set before it was ESTABLISHED. In other words, a non-NULL sk_reuseport_cb
+does not imply a non-refcounted socket.
+
+Drop sk's reference in both error paths.
+
+unreferenced object 0xffff888101911800 (size 2048):
+  comm "test_progs", pid 44109, jiffies 4297131437
+  hex dump (first 32 bytes):
+    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+    80 00 01 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+  backtrace (crc 9336483b):
+    __kmalloc_noprof+0x3bf/0x560
+    __reuseport_alloc+0x1d/0x40
+    reuseport_alloc+0xca/0x150
+    reuseport_attach_prog+0x87/0x140
+    sk_reuseport_attach_bpf+0xc8/0x100
+    sk_setsockopt+0x1181/0x1990
+    do_sock_setsockopt+0x12b/0x160
+    __sys_setsockopt+0x7b/0xc0
+    __x64_sys_setsockopt+0x1b/0x30
+    do_syscall_64+0x93/0x180
+    entry_SYSCALL_64_after_hwframe+0x76/0x7e
+
+Fixes: 64d85290d79c ("bpf: Allow bpf_map_lookup_elem for SOCKMAP and SOCKHASH")
+Signed-off-by: Michal Luczaj <mhal@rbox.co>
+Reviewed-by: Martin KaFai Lau <martin.lau@kernel.org>
+Link: https://patch.msgid.link/20250110-reuseport-memleak-v1-1-fa1ddab0adfe@rbox.co
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/filter.c | 30 ++++++++++++++++++------------
+ 1 file changed, 18 insertions(+), 12 deletions(-)
+
+diff --git a/net/core/filter.c b/net/core/filter.c
+index 34320ce70096a..5881944f1681c 100644
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -11190,6 +11190,7 @@ BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern,
+       bool is_sockarray = map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY;
+       struct sock_reuseport *reuse;
+       struct sock *selected_sk;
++      int err;
+ 
+       selected_sk = map->ops->map_lookup_elem(map, key);
+       if (!selected_sk)
+@@ -11197,10 +11198,6 @@ BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern,
+ 
+       reuse = rcu_dereference(selected_sk->sk_reuseport_cb);
+       if (!reuse) {
+-              /* Lookup in sock_map can return TCP ESTABLISHED sockets. */
+-              if (sk_is_refcounted(selected_sk))
+-                      sock_put(selected_sk);
+-
+               /* reuseport_array has only sk with non NULL sk_reuseport_cb.
+                * The only (!reuse) case here is - the sk has already been
+                * unhashed (e.g. by close()), so treat it as -ENOENT.
+@@ -11208,24 +11205,33 @@ BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern,
+                * Other maps (e.g. sock_map) do not provide this guarantee and
+                * the sk may never be in the reuseport group to begin with.
+                */
+-              return is_sockarray ? -ENOENT : -EINVAL;
++              err = is_sockarray ? -ENOENT : -EINVAL;
++              goto error;
+       }
+ 
+       if (unlikely(reuse->reuseport_id != reuse_kern->reuseport_id)) {
+               struct sock *sk = reuse_kern->sk;
+ 
+-              if (sk->sk_protocol != selected_sk->sk_protocol)
+-                      return -EPROTOTYPE;
+-              else if (sk->sk_family != selected_sk->sk_family)
+-                      return -EAFNOSUPPORT;
+-
+-              /* Catch all. Likely bound to a different sockaddr. */
+-              return -EBADFD;
++              if (sk->sk_protocol != selected_sk->sk_protocol) {
++                      err = -EPROTOTYPE;
++              } else if (sk->sk_family != selected_sk->sk_family) {
++                      err = -EAFNOSUPPORT;
++              } else {
++                      /* Catch all. Likely bound to a different sockaddr. */
++                      err = -EBADFD;
++              }
++              goto error;
+       }
+ 
+       reuse_kern->selected_sk = selected_sk;
+ 
+       return 0;
++error:
++      /* Lookup in sock_map can return TCP ESTABLISHED sockets. */
++      if (sk_is_refcounted(selected_sk))
++              sock_put(selected_sk);
++
++      return err;
+ }
+ 
+ static const struct bpf_func_proto sk_select_reuseport_proto = {
+-- 
+2.39.5
+
diff --git a/queue-6.6/drm-v3d-ensure-job-pointer-is-set-to-null-after-job-.patch b/queue-6.6/drm-v3d-ensure-job-pointer-is-set-to-null-after-job-.patch

new file mode 100644 (file)

index 0000000..85a22f9
--- /dev/null
+++ b/queue-6.6/drm-v3d-ensure-job-pointer-is-set-to-null-after-job-.patch
@@ -0,0 +1,66 @@
+From b4769dea48b847960e103cabb35efb7ca96768ce Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Jan 2025 12:47:40 -0300
+Subject: drm/v3d: Ensure job pointer is set to NULL after job completion
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Maíra Canal <mcanal@igalia.com>
+
+[ Upstream commit e4b5ccd392b92300a2b341705cc4805681094e49 ]
+
+After a job completes, the corresponding pointer in the device must
+be set to NULL. Failing to do so triggers a warning when unloading
+the driver, as it appears the job is still active. To prevent this,
+assign the job pointer to NULL after completing the job, indicating
+the job has finished.
+
+Fixes: 14d1d1908696 ("drm/v3d: Remove the bad signaled() implementation.")
+Signed-off-by: Maíra Canal <mcanal@igalia.com>
+Reviewed-by: Jose Maria Casanova Crespo <jmcasanova@igalia.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20250113154741.67520-1-mcanal@igalia.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/v3d/v3d_irq.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c
+index e714d5318f309..76806039691a2 100644
+--- a/drivers/gpu/drm/v3d/v3d_irq.c
++++ b/drivers/gpu/drm/v3d/v3d_irq.c
+@@ -103,6 +103,7 @@ v3d_irq(int irq, void *arg)
+ 
+               trace_v3d_bcl_irq(&v3d->drm, fence->seqno);
+               dma_fence_signal(&fence->base);
++              v3d->bin_job = NULL;
+               status = IRQ_HANDLED;
+       }
+ 
+@@ -112,6 +113,7 @@ v3d_irq(int irq, void *arg)
+ 
+               trace_v3d_rcl_irq(&v3d->drm, fence->seqno);
+               dma_fence_signal(&fence->base);
++              v3d->render_job = NULL;
+               status = IRQ_HANDLED;
+       }
+ 
+@@ -121,6 +123,7 @@ v3d_irq(int irq, void *arg)
+ 
+               trace_v3d_csd_irq(&v3d->drm, fence->seqno);
+               dma_fence_signal(&fence->base);
++              v3d->csd_job = NULL;
+               status = IRQ_HANDLED;
+       }
+ 
+@@ -157,6 +160,7 @@ v3d_hub_irq(int irq, void *arg)
+ 
+               trace_v3d_tfu_irq(&v3d->drm, fence->seqno);
+               dma_fence_signal(&fence->base);
++              v3d->tfu_job = NULL;
+               status = IRQ_HANDLED;
+       }
+ 
+-- 
+2.39.5
+
diff --git a/queue-6.6/drm-vmwgfx-add-new-keep_resv-bo-param.patch b/queue-6.6/drm-vmwgfx-add-new-keep_resv-bo-param.patch

new file mode 100644 (file)

index 0000000..23ac873
--- /dev/null
+++ b/queue-6.6/drm-vmwgfx-add-new-keep_resv-bo-param.patch
@@ -0,0 +1,147 @@
+From 34db1008ec0fdf5b434f6643e2a9cf488a53ec75 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 10 Jan 2025 12:53:35 -0600
+Subject: drm/vmwgfx: Add new keep_resv BO param
+
+From: Ian Forbes <ian.forbes@broadcom.com>
+
+[ Upstream commit b7d40627813799870e72729c6fc979a8a40d9ba6 ]
+
+Adds a new BO param that keeps the reservation locked after creation.
+This removes the need to re-reserve the BO after creation which is a
+waste of cycles.
+
+This also fixes a bug in vmw_prime_import_sg_table where the imported
+reservation is unlocked twice.
+
+Signed-off-by: Ian Forbes <ian.forbes@broadcom.com>
+Fixes: b32233acceff ("drm/vmwgfx: Fix prime import/export")
+Reviewed-by: Zack Rusin <zack.rusin@broadcom.com>
+Signed-off-by: Zack Rusin <zack.rusin@broadcom.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20250110185335.15301-1-ian.forbes@broadcom.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/vmwgfx/vmwgfx_bo.c         | 3 ++-
+ drivers/gpu/drm/vmwgfx/vmwgfx_bo.h         | 3 ++-
+ drivers/gpu/drm/vmwgfx/vmwgfx_drv.c        | 7 ++-----
+ drivers/gpu/drm/vmwgfx/vmwgfx_gem.c        | 1 +
+ drivers/gpu/drm/vmwgfx/vmwgfx_shader.c     | 7 ++-----
+ drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c | 5 ++---
+ 6 files changed, 11 insertions(+), 15 deletions(-)
+
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
+index fdc34283eeb97..ec6ca264ce11f 100644
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
+@@ -412,7 +412,8 @@ static int vmw_bo_init(struct vmw_private *dev_priv,
+ 
+       if (params->pin)
+               ttm_bo_pin(&vmw_bo->tbo);
+-      ttm_bo_unreserve(&vmw_bo->tbo);
++      if (!params->keep_resv)
++              ttm_bo_unreserve(&vmw_bo->tbo);
+ 
+       return 0;
+ }
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h
+index 156ea612fc2a4..a3ac61b991bf6 100644
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h
+@@ -53,8 +53,9 @@ struct vmw_bo_params {
+       u32 domain;
+       u32 busy_domain;
+       enum ttm_bo_type bo_type;
+-      size_t size;
+       bool pin;
++      bool keep_resv;
++      size_t size;
+       struct dma_resv *resv;
+       struct sg_table *sg;
+ };
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+index bea576434e475..4655c266924fe 100644
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+@@ -399,7 +399,8 @@ static int vmw_dummy_query_bo_create(struct vmw_private *dev_priv)
+               .busy_domain = VMW_BO_DOMAIN_SYS,
+               .bo_type = ttm_bo_type_kernel,
+               .size = PAGE_SIZE,
+-              .pin = true
++              .pin = true,
++              .keep_resv = true,
+       };
+ 
+       /*
+@@ -411,10 +412,6 @@ static int vmw_dummy_query_bo_create(struct vmw_private *dev_priv)
+       if (unlikely(ret != 0))
+               return ret;
+ 
+-      ret = ttm_bo_reserve(&vbo->tbo, false, true, NULL);
+-      BUG_ON(ret != 0);
+-      vmw_bo_pin_reserved(vbo, true);
+-
+       ret = ttm_bo_kmap(&vbo->tbo, 0, 1, &map);
+       if (likely(ret == 0)) {
+               result = ttm_kmap_obj_virtual(&map, &dummy);
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c
+index d6bcaf078b1f4..0dc3dacc5beee 100644
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c
+@@ -163,6 +163,7 @@ struct drm_gem_object *vmw_prime_import_sg_table(struct drm_device *dev,
+               .bo_type = ttm_bo_type_sg,
+               .size = attach->dmabuf->size,
+               .pin = false,
++              .keep_resv = true,
+               .resv = attach->dmabuf->resv,
+               .sg = table,
+ 
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c
+index a01ca3226d0af..7fb1c88bcc475 100644
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c
+@@ -896,7 +896,8 @@ int vmw_compat_shader_add(struct vmw_private *dev_priv,
+               .busy_domain = VMW_BO_DOMAIN_SYS,
+               .bo_type = ttm_bo_type_device,
+               .size = size,
+-              .pin = true
++              .pin = true,
++              .keep_resv = true,
+       };
+ 
+       if (!vmw_shader_id_ok(user_key, shader_type))
+@@ -906,10 +907,6 @@ int vmw_compat_shader_add(struct vmw_private *dev_priv,
+       if (unlikely(ret != 0))
+               goto out;
+ 
+-      ret = ttm_bo_reserve(&buf->tbo, false, true, NULL);
+-      if (unlikely(ret != 0))
+-              goto no_reserve;
+-
+       /* Map and copy shader bytecode. */
+       ret = ttm_bo_kmap(&buf->tbo, 0, PFN_UP(size), &map);
+       if (unlikely(ret != 0)) {
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
+index fcb87d83760ef..75cf9e76df2ed 100644
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
+@@ -604,15 +604,14 @@ int vmw_bo_create_and_populate(struct vmw_private *dev_priv,
+               .busy_domain = domain,
+               .bo_type = ttm_bo_type_kernel,
+               .size = bo_size,
+-              .pin = true
++              .pin = true,
++              .keep_resv = true,
+       };
+ 
+       ret = vmw_bo_create(dev_priv, &bo_params, &vbo);
+       if (unlikely(ret != 0))
+               return ret;
+ 
+-      ret = ttm_bo_reserve(&vbo->tbo, false, true, NULL);
+-      BUG_ON(ret != 0);
+       ret = vmw_ttm_populate(vbo->tbo.bdev, vbo->tbo.ttm, &ctx);
+       if (likely(ret == 0)) {
+               struct vmw_ttm_tt *vmw_tt =
+-- 
+2.39.5
+
diff --git a/queue-6.6/gtp-destroy-device-along-with-udp-socket-s-netns-dis.patch b/queue-6.6/gtp-destroy-device-along-with-udp-socket-s-netns-dis.patch

new file mode 100644 (file)

index 0000000..2b3a944
--- /dev/null
+++ b/queue-6.6/gtp-destroy-device-along-with-udp-socket-s-netns-dis.patch
@@ -0,0 +1,125 @@
+From cadc93de3d1ea0c794df138b903151a27ecfa791 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 10 Jan 2025 10:47:53 +0900
+Subject: gtp: Destroy device along with udp socket's netns dismantle.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit eb28fd76c0a08a47b470677c6cef9dd1c60e92d1 ]
+
+gtp_newlink() links the device to a list in dev_net(dev) instead of
+src_net, where a udp tunnel socket is created.
+
+Even when src_net is removed, the device stays alive on dev_net(dev).
+Then, removing src_net triggers the splat below. [0]
+
+In this example, gtp0 is created in ns2, and the udp socket is created
+in ns1.
+
+  ip netns add ns1
+  ip netns add ns2
+  ip -n ns1 link add netns ns2 name gtp0 type gtp role sgsn
+  ip netns del ns1
+
+Let's link the device to the socket's netns instead.
+
+Now, gtp_net_exit_batch_rtnl() needs another netdev iteration to remove
+all gtp devices in the netns.
+
+[0]:
+ref_tracker: net notrefcnt@000000003d6e7d05 has 1/2 users at
+     sk_alloc (./include/net/net_namespace.h:345 net/core/sock.c:2236)
+     inet_create (net/ipv4/af_inet.c:326 net/ipv4/af_inet.c:252)
+     __sock_create (net/socket.c:1558)
+     udp_sock_create4 (net/ipv4/udp_tunnel_core.c:18)
+     gtp_create_sock (./include/net/udp_tunnel.h:59 drivers/net/gtp.c:1423)
+     gtp_create_sockets (drivers/net/gtp.c:1447)
+     gtp_newlink (drivers/net/gtp.c:1507)
+     rtnl_newlink (net/core/rtnetlink.c:3786 net/core/rtnetlink.c:3897 net/core/rtnetlink.c:4012)
+     rtnetlink_rcv_msg (net/core/rtnetlink.c:6922)
+     netlink_rcv_skb (net/netlink/af_netlink.c:2542)
+     netlink_unicast (net/netlink/af_netlink.c:1321 net/netlink/af_netlink.c:1347)
+     netlink_sendmsg (net/netlink/af_netlink.c:1891)
+     ____sys_sendmsg (net/socket.c:711 net/socket.c:726 net/socket.c:2583)
+     ___sys_sendmsg (net/socket.c:2639)
+     __sys_sendmsg (net/socket.c:2669)
+     do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83)
+
+WARNING: CPU: 1 PID: 60 at lib/ref_tracker.c:179 ref_tracker_dir_exit (lib/ref_tracker.c:179)
+Modules linked in:
+CPU: 1 UID: 0 PID: 60 Comm: kworker/u16:2 Not tainted 6.13.0-rc5-00147-g4c1224501e9d #5
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
+Workqueue: netns cleanup_net
+RIP: 0010:ref_tracker_dir_exit (lib/ref_tracker.c:179)
+Code: 00 00 00 fc ff df 4d 8b 26 49 bd 00 01 00 00 00 00 ad de 4c 39 f5 0f 85 df 00 00 00 48 8b 74 24 08 48 89 df e8 a5 cc 12 02 90 <0f> 0b 90 48 8d 6b 44 be 04 00 00 00 48 89 ef e8 80 de 67 ff 48 89
+RSP: 0018:ff11000009a07b60 EFLAGS: 00010286
+RAX: 0000000000002bd3 RBX: ff1100000f4e1aa0 RCX: 1ffffffff0e40ac6
+RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffff8423ee3c
+RBP: ff1100000f4e1af0 R08: 0000000000000001 R09: fffffbfff0e395ae
+R10: 0000000000000001 R11: 0000000000036001 R12: ff1100000f4e1af0
+R13: dead000000000100 R14: ff1100000f4e1af0 R15: dffffc0000000000
+FS:  0000000000000000(0000) GS:ff1100006ce80000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007f9b2464bd98 CR3: 0000000005286005 CR4: 0000000000771ef0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe07f0 DR7: 0000000000000400
+PKRU: 55555554
+Call Trace:
+ <TASK>
+ ? __warn (kernel/panic.c:748)
+ ? ref_tracker_dir_exit (lib/ref_tracker.c:179)
+ ? report_bug (lib/bug.c:201 lib/bug.c:219)
+ ? handle_bug (arch/x86/kernel/traps.c:285)
+ ? exc_invalid_op (arch/x86/kernel/traps.c:309 (discriminator 1))
+ ? asm_exc_invalid_op (./arch/x86/include/asm/idtentry.h:621)
+ ? _raw_spin_unlock_irqrestore (./arch/x86/include/asm/irqflags.h:42 ./arch/x86/include/asm/irqflags.h:97 ./arch/x86/include/asm/irqflags.h:155 ./include/linux/spinlock_api_smp.h:151 kernel/locking/spinlock.c:194)
+ ? ref_tracker_dir_exit (lib/ref_tracker.c:179)
+ ? __pfx_ref_tracker_dir_exit (lib/ref_tracker.c:158)
+ ? kfree (mm/slub.c:4613 mm/slub.c:4761)
+ net_free (net/core/net_namespace.c:476 net/core/net_namespace.c:467)
+ cleanup_net (net/core/net_namespace.c:664 (discriminator 3))
+ process_one_work (kernel/workqueue.c:3229)
+ worker_thread (kernel/workqueue.c:3304 kernel/workqueue.c:3391)
+ kthread (kernel/kthread.c:389)
+ ret_from_fork (arch/x86/kernel/process.c:147)
+ ret_from_fork_asm (arch/x86/entry/entry_64.S:257)
+ </TASK>
+
+Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)")
+Reported-by: Xiao Liang <shaw.leon@gmail.com>
+Closes: https://lore.kernel.org/netdev/20250104125732.17335-1-shaw.leon@gmail.com/
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/gtp.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
+index 69b89483f1b50..47238c3ec82e7 100644
+--- a/drivers/net/gtp.c
++++ b/drivers/net/gtp.c
+@@ -1095,7 +1095,7 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev,
+               goto out_encap;
+       }
+ 
+-      gn = net_generic(dev_net(dev), gtp_net_id);
++      gn = net_generic(src_net, gtp_net_id);
+       list_add(&gtp->list, &gn->gtp_dev_list);
+       dev->priv_destructor = gtp_destructor;
+ 
+@@ -1895,6 +1895,11 @@ static void __net_exit gtp_net_exit_batch_rtnl(struct list_head *net_list,
+       list_for_each_entry(net, net_list, exit_list) {
+               struct gtp_net *gn = net_generic(net, gtp_net_id);
+               struct gtp_dev *gtp, *gtp_next;
++              struct net_device *dev;
++
++              for_each_netdev(net, dev)
++                      if (dev->rtnl_link_ops == &gtp_link_ops)
++                              gtp_dellink(dev, dev_to_kill);
+ 
+               list_for_each_entry_safe(gtp, gtp_next, &gn->gtp_dev_list, list)
+                       gtp_dellink(gtp->dev, dev_to_kill);
+-- 
+2.39.5
+
diff --git a/queue-6.6/gtp-use-exit_batch_rtnl-method.patch b/queue-6.6/gtp-use-exit_batch_rtnl-method.patch

new file mode 100644 (file)

index 0000000..5c6839f
--- /dev/null
+++ b/queue-6.6/gtp-use-exit_batch_rtnl-method.patch
@@ -0,0 +1,66 @@
+From 04324d54e3404c359540b2bb9a424016ee1e15b9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 6 Feb 2024 14:43:03 +0000
+Subject: gtp: use exit_batch_rtnl() method
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 6eedda01b2bfdcf427b37759e053dc27232f3af1 ]
+
+exit_batch_rtnl() is called while RTNL is held,
+and devices to be unregistered can be queued in the dev_kill_list.
+
+This saves one rtnl_lock()/rtnl_unlock() pair per netns
+and one unregister_netdevice_many() call per netns.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Antoine Tenart <atenart@kernel.org>
+Link: https://lore.kernel.org/r/20240206144313.2050392-8-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 46841c7053e6 ("gtp: Use for_each_netdev_rcu() in gtp_genl_dump_pdp().")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/gtp.c | 20 ++++++++++----------
+ 1 file changed, 10 insertions(+), 10 deletions(-)
+
+diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
+index 9dd8f66610ce6..60c950066ec5b 100644
+--- a/drivers/net/gtp.c
++++ b/drivers/net/gtp.c
+@@ -1884,23 +1884,23 @@ static int __net_init gtp_net_init(struct net *net)
+       return 0;
+ }
+ 
+-static void __net_exit gtp_net_exit(struct net *net)
++static void __net_exit gtp_net_exit_batch_rtnl(struct list_head *net_list,
++                                             struct list_head *dev_to_kill)
+ {
+-      struct gtp_net *gn = net_generic(net, gtp_net_id);
+-      struct gtp_dev *gtp;
+-      LIST_HEAD(list);
++      struct net *net;
+ 
+-      rtnl_lock();
+-      list_for_each_entry(gtp, &gn->gtp_dev_list, list)
+-              gtp_dellink(gtp->dev, &list);
++      list_for_each_entry(net, net_list, exit_list) {
++              struct gtp_net *gn = net_generic(net, gtp_net_id);
++              struct gtp_dev *gtp;
+ 
+-      unregister_netdevice_many(&list);
+-      rtnl_unlock();
++              list_for_each_entry(gtp, &gn->gtp_dev_list, list)
++                      gtp_dellink(gtp->dev, dev_to_kill);
++      }
+ }
+ 
+ static struct pernet_operations gtp_net_ops = {
+       .init   = gtp_net_init,
+-      .exit   = gtp_net_exit,
++      .exit_batch_rtnl = gtp_net_exit_batch_rtnl,
+       .id     = &gtp_net_id,
+       .size   = sizeof(struct gtp_net),
+ };
+-- 
+2.39.5
+
diff --git a/queue-6.6/gtp-use-for_each_netdev_rcu-in-gtp_genl_dump_pdp.patch b/queue-6.6/gtp-use-for_each_netdev_rcu-in-gtp_genl_dump_pdp.patch

new file mode 100644 (file)

index 0000000..20f25ea
--- /dev/null
+++ b/queue-6.6/gtp-use-for_each_netdev_rcu-in-gtp_genl_dump_pdp.patch
@@ -0,0 +1,91 @@
+From 889fa0cf5275c83a676d1f8b8d9b5306b1afcb3f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 10 Jan 2025 10:47:52 +0900
+Subject: gtp: Use for_each_netdev_rcu() in gtp_genl_dump_pdp().
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 46841c7053e6d25fb33e0534ef023833bf03e382 ]
+
+gtp_newlink() links the gtp device to a list in dev_net(dev).
+
+However, even after the gtp device is moved to another netns,
+it stays on the list but should be invisible.
+
+Let's use for_each_netdev_rcu() for netdev traversal in
+gtp_genl_dump_pdp().
+
+Note that gtp_dev_list is no longer used under RCU, so list
+helpers are converted to the non-RCU variant.
+
+Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)")
+Reported-by: Xiao Liang <shaw.leon@gmail.com>
+Closes: https://lore.kernel.org/netdev/CABAhCOQdBL6h9M2C+kd+bGivRJ9Q72JUxW+-gur0nub_=PmFPA@mail.gmail.com/
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/gtp.c | 19 +++++++++++--------
+ 1 file changed, 11 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
+index 60c950066ec5b..69b89483f1b50 100644
+--- a/drivers/net/gtp.c
++++ b/drivers/net/gtp.c
+@@ -1096,7 +1096,7 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev,
+       }
+ 
+       gn = net_generic(dev_net(dev), gtp_net_id);
+-      list_add_rcu(&gtp->list, &gn->gtp_dev_list);
++      list_add(&gtp->list, &gn->gtp_dev_list);
+       dev->priv_destructor = gtp_destructor;
+ 
+       netdev_dbg(dev, "registered new GTP interface\n");
+@@ -1122,7 +1122,7 @@ static void gtp_dellink(struct net_device *dev, struct list_head *head)
+               hlist_for_each_entry_safe(pctx, next, &gtp->tid_hash[i], hlist_tid)
+                       pdp_context_delete(pctx);
+ 
+-      list_del_rcu(&gtp->list);
++      list_del(&gtp->list);
+       unregister_netdevice_queue(dev, head);
+ }
+ 
+@@ -1690,16 +1690,19 @@ static int gtp_genl_dump_pdp(struct sk_buff *skb,
+       struct gtp_dev *last_gtp = (struct gtp_dev *)cb->args[2], *gtp;
+       int i, j, bucket = cb->args[0], skip = cb->args[1];
+       struct net *net = sock_net(skb->sk);
++      struct net_device *dev;
+       struct pdp_ctx *pctx;
+-      struct gtp_net *gn;
+-
+-      gn = net_generic(net, gtp_net_id);
+ 
+       if (cb->args[4])
+               return 0;
+ 
+       rcu_read_lock();
+-      list_for_each_entry_rcu(gtp, &gn->gtp_dev_list, list) {
++      for_each_netdev_rcu(net, dev) {
++              if (dev->rtnl_link_ops != &gtp_link_ops)
++                      continue;
++
++              gtp = netdev_priv(dev);
++
+               if (last_gtp && last_gtp != gtp)
+                       continue;
+               else
+@@ -1891,9 +1894,9 @@ static void __net_exit gtp_net_exit_batch_rtnl(struct list_head *net_list,
+ 
+       list_for_each_entry(net, net_list, exit_list) {
+               struct gtp_net *gn = net_generic(net, gtp_net_id);
+-              struct gtp_dev *gtp;
++              struct gtp_dev *gtp, *gtp_next;
+ 
+-              list_for_each_entry(gtp, &gn->gtp_dev_list, list)
++              list_for_each_entry_safe(gtp, gtp_next, &gn->gtp_dev_list, list)
+                       gtp_dellink(gtp->dev, dev_to_kill);
+       }
+ }
+-- 
+2.39.5
+
diff --git a/queue-6.6/net-add-exit_batch_rtnl-method.patch b/queue-6.6/net-add-exit_batch_rtnl-method.patch

new file mode 100644 (file)

index 0000000..f9a605f
--- /dev/null
+++ b/queue-6.6/net-add-exit_batch_rtnl-method.patch
@@ -0,0 +1,128 @@
+From fde7ab53d3e6e261b77704d4be0d4f8dd0c034a0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 6 Feb 2024 14:42:57 +0000
+Subject: net: add exit_batch_rtnl() method
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit fd4f101edbd9f99567ab2adb1f2169579ede7c13 ]
+
+Many (struct pernet_operations)->exit_batch() methods have
+to acquire rtnl.
+
+In presence of rtnl mutex pressure, this makes cleanup_net()
+very slow.
+
+This patch adds a new exit_batch_rtnl() method to reduce
+number of rtnl acquisitions from cleanup_net().
+
+exit_batch_rtnl() handlers are called while rtnl is locked,
+and devices to be killed can be queued in a list provided
+as their second argument.
+
+A single unregister_netdevice_many() is called right
+before rtnl is released.
+
+exit_batch_rtnl() handlers are called before ->exit() and
+->exit_batch() handlers.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Antoine Tenart <atenart@kernel.org>
+Link: https://lore.kernel.org/r/20240206144313.2050392-2-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 46841c7053e6 ("gtp: Use for_each_netdev_rcu() in gtp_genl_dump_pdp().")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/net_namespace.h |  3 +++
+ net/core/net_namespace.c    | 31 ++++++++++++++++++++++++++++++-
+ 2 files changed, 33 insertions(+), 1 deletion(-)
+
+diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
+index 958c805df1915..1befad79a6734 100644
+--- a/include/net/net_namespace.h
++++ b/include/net/net_namespace.h
+@@ -442,6 +442,9 @@ struct pernet_operations {
+       void (*pre_exit)(struct net *net);
+       void (*exit)(struct net *net);
+       void (*exit_batch)(struct list_head *net_exit_list);
++      /* Following method is called with RTNL held. */
++      void (*exit_batch_rtnl)(struct list_head *net_exit_list,
++                              struct list_head *dev_kill_list);
+       unsigned int *id;
+       size_t size;
+ };
+diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
+index 92b7fea4d495c..70ac9d9bc8770 100644
+--- a/net/core/net_namespace.c
++++ b/net/core/net_namespace.c
+@@ -321,8 +321,9 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
+ {
+       /* Must be called with pernet_ops_rwsem held */
+       const struct pernet_operations *ops, *saved_ops;
+-      int error = 0;
+       LIST_HEAD(net_exit_list);
++      LIST_HEAD(dev_kill_list);
++      int error = 0;
+ 
+       refcount_set(&net->ns.count, 1);
+       ref_tracker_dir_init(&net->refcnt_tracker, 128, "net refcnt");
+@@ -360,6 +361,15 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
+ 
+       synchronize_rcu();
+ 
++      ops = saved_ops;
++      rtnl_lock();
++      list_for_each_entry_continue_reverse(ops, &pernet_list, list) {
++              if (ops->exit_batch_rtnl)
++                      ops->exit_batch_rtnl(&net_exit_list, &dev_kill_list);
++      }
++      unregister_netdevice_many(&dev_kill_list);
++      rtnl_unlock();
++
+       ops = saved_ops;
+       list_for_each_entry_continue_reverse(ops, &pernet_list, list)
+               ops_exit_list(ops, &net_exit_list);
+@@ -588,6 +598,7 @@ static void cleanup_net(struct work_struct *work)
+       struct net *net, *tmp, *last;
+       struct llist_node *net_kill_list;
+       LIST_HEAD(net_exit_list);
++      LIST_HEAD(dev_kill_list);
+ 
+       /* Atomically snapshot the list of namespaces to cleanup */
+       net_kill_list = llist_del_all(&cleanup_list);
+@@ -628,6 +639,14 @@ static void cleanup_net(struct work_struct *work)
+        */
+       synchronize_rcu();
+ 
++      rtnl_lock();
++      list_for_each_entry_reverse(ops, &pernet_list, list) {
++              if (ops->exit_batch_rtnl)
++                      ops->exit_batch_rtnl(&net_exit_list, &dev_kill_list);
++      }
++      unregister_netdevice_many(&dev_kill_list);
++      rtnl_unlock();
++
+       /* Run all of the network namespace exit methods */
+       list_for_each_entry_reverse(ops, &pernet_list, list)
+               ops_exit_list(ops, &net_exit_list);
+@@ -1170,7 +1189,17 @@ static void free_exit_list(struct pernet_operations *ops, struct list_head *net_
+ {
+       ops_pre_exit_list(ops, net_exit_list);
+       synchronize_rcu();
++
++      if (ops->exit_batch_rtnl) {
++              LIST_HEAD(dev_kill_list);
++
++              rtnl_lock();
++              ops->exit_batch_rtnl(net_exit_list, &dev_kill_list);
++              unregister_netdevice_many(&dev_kill_list);
++              rtnl_unlock();
++      }
+       ops_exit_list(ops, net_exit_list);
++
+       ops_free_list(ops, net_exit_list);
+ }
+ 
+-- 
+2.39.5
+
diff --git a/queue-6.6/net-ethernet-ti-cpsw_ale-fix-cpsw_ale_get_field.patch b/queue-6.6/net-ethernet-ti-cpsw_ale-fix-cpsw_ale_get_field.patch

new file mode 100644 (file)

index 0000000..db00dbd
--- /dev/null
+++ b/queue-6.6/net-ethernet-ti-cpsw_ale-fix-cpsw_ale_get_field.patch
@@ -0,0 +1,86 @@
+From d807b9a7220f57ca96ef9055492443dbc8cfdfd3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 8 Jan 2025 22:54:33 +0530
+Subject: net: ethernet: ti: cpsw_ale: Fix cpsw_ale_get_field()
+
+From: Sudheer Kumar Doredla <s-doredla@ti.com>
+
+[ Upstream commit 03d120f27d050336f7e7d21879891542c4741f81 ]
+
+CPSW ALE has 75-bit ALE entries stored across three 32-bit words.
+The cpsw_ale_get_field() and cpsw_ale_set_field() functions support
+ALE field entries spanning up to two words at the most.
+
+The cpsw_ale_get_field() and cpsw_ale_set_field() functions work as
+expected when ALE field spanned across word1 and word2, but fails when
+ALE field spanned across word2 and word3.
+
+For example, while reading the ALE field spanned across word2 and word3
+(i.e. bits 62 to 64), the word3 data shifted to an incorrect position
+due to the index becoming zero while flipping.
+The same issue occurred when setting an ALE entry.
+
+This issue has not been seen in practice but will be an issue in the future
+if the driver supports accessing ALE fields spanning word2 and word3
+
+Fix the methods to handle getting/setting fields spanning up to two words.
+
+Fixes: b685f1a58956 ("net: ethernet: ti: cpsw_ale: Fix cpsw_ale_get_field()/cpsw_ale_set_field()")
+Signed-off-by: Sudheer Kumar Doredla <s-doredla@ti.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Reviewed-by: Roger Quadros <rogerq@kernel.org>
+Reviewed-by: Siddharth Vadapalli <s-vadapalli@ti.com>
+Link: https://patch.msgid.link/20250108172433.311694-1-s-doredla@ti.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/ti/cpsw_ale.c | 14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c
+index 64bf22cd860c9..9eccc7064c2b0 100644
+--- a/drivers/net/ethernet/ti/cpsw_ale.c
++++ b/drivers/net/ethernet/ti/cpsw_ale.c
+@@ -106,15 +106,15 @@ struct cpsw_ale_dev_id {
+ 
+ static inline int cpsw_ale_get_field(u32 *ale_entry, u32 start, u32 bits)
+ {
+-      int idx, idx2;
++      int idx, idx2, index;
+       u32 hi_val = 0;
+ 
+       idx    = start / 32;
+       idx2 = (start + bits - 1) / 32;
+       /* Check if bits to be fetched exceed a word */
+       if (idx != idx2) {
+-              idx2 = 2 - idx2; /* flip */
+-              hi_val = ale_entry[idx2] << ((idx2 * 32) - start);
++              index = 2 - idx2; /* flip */
++              hi_val = ale_entry[index] << ((idx2 * 32) - start);
+       }
+       start -= idx * 32;
+       idx    = 2 - idx; /* flip */
+@@ -124,16 +124,16 @@ static inline int cpsw_ale_get_field(u32 *ale_entry, u32 start, u32 bits)
+ static inline void cpsw_ale_set_field(u32 *ale_entry, u32 start, u32 bits,
+                                     u32 value)
+ {
+-      int idx, idx2;
++      int idx, idx2, index;
+ 
+       value &= BITMASK(bits);
+       idx = start / 32;
+       idx2 = (start + bits - 1) / 32;
+       /* Check if bits to be set exceed a word */
+       if (idx != idx2) {
+-              idx2 = 2 - idx2; /* flip */
+-              ale_entry[idx2] &= ~(BITMASK(bits + start - (idx2 * 32)));
+-              ale_entry[idx2] |= (value >> ((idx2 * 32) - start));
++              index = 2 - idx2; /* flip */
++              ale_entry[index] &= ~(BITMASK(bits + start - (idx2 * 32)));
++              ale_entry[index] |= (value >> ((idx2 * 32) - start));
+       }
+       start -= idx * 32;
+       idx = 2 - idx; /* flip */
+-- 
+2.39.5
+
diff --git a/queue-6.6/net-fec-handle-page_pool_dev_alloc_pages-error.patch b/queue-6.6/net-fec-handle-page_pool_dev_alloc_pages-error.patch

new file mode 100644 (file)

index 0000000..59b6f71
--- /dev/null
+++ b/queue-6.6/net-fec-handle-page_pool_dev_alloc_pages-error.patch
@@ -0,0 +1,96 @@
+From 07efee4a2626c19ea8f3542680a98467ccb1330b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Jan 2025 10:48:45 -0500
+Subject: net: fec: handle page_pool_dev_alloc_pages error
+
+From: Kevin Groeneveld <kgroeneveld@lenbrook.com>
+
+[ Upstream commit 001ba0902046cb6c352494df610718c0763e77a5 ]
+
+The fec_enet_update_cbd function calls page_pool_dev_alloc_pages but did
+not handle the case when it returned NULL. There was a WARN_ON(!new_page)
+but it would still proceed to use the NULL pointer and then crash.
+
+This case does seem somewhat rare but when the system is under memory
+pressure it can happen. One case where I can duplicate this with some
+frequency is when writing over a smbd share to a SATA HDD attached to an
+imx6q.
+
+Setting /proc/sys/vm/min_free_kbytes to higher values also seems to solve
+the problem for my test case. But it still seems wrong that the fec driver
+ignores the memory allocation error and can crash.
+
+This commit handles the allocation error by dropping the current packet.
+
+Fixes: 95698ff6177b5 ("net: fec: using page pool to manage RX buffers")
+Signed-off-by: Kevin Groeneveld <kgroeneveld@lenbrook.com>
+Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
+Reviewed-by: Wei Fang <wei.fang@nxp.com>
+Link: https://patch.msgid.link/20250113154846.1765414-1-kgroeneveld@lenbrook.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/freescale/fec_main.c | 19 ++++++++++++++-----
+ 1 file changed, 14 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
+index e8d9a0eba4d6b..8f5cc1f233188 100644
+--- a/drivers/net/ethernet/freescale/fec_main.c
++++ b/drivers/net/ethernet/freescale/fec_main.c
+@@ -1572,19 +1572,22 @@ static void fec_enet_tx(struct net_device *ndev, int budget)
+               fec_enet_tx_queue(ndev, i, budget);
+ }
+ 
+-static void fec_enet_update_cbd(struct fec_enet_priv_rx_q *rxq,
++static int fec_enet_update_cbd(struct fec_enet_priv_rx_q *rxq,
+                               struct bufdesc *bdp, int index)
+ {
+       struct page *new_page;
+       dma_addr_t phys_addr;
+ 
+       new_page = page_pool_dev_alloc_pages(rxq->page_pool);
+-      WARN_ON(!new_page);
+-      rxq->rx_skb_info[index].page = new_page;
++      if (unlikely(!new_page))
++              return -ENOMEM;
+ 
++      rxq->rx_skb_info[index].page = new_page;
+       rxq->rx_skb_info[index].offset = FEC_ENET_XDP_HEADROOM;
+       phys_addr = page_pool_get_dma_addr(new_page) + FEC_ENET_XDP_HEADROOM;
+       bdp->cbd_bufaddr = cpu_to_fec32(phys_addr);
++
++      return 0;
+ }
+ 
+ static u32
+@@ -1679,6 +1682,7 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
+       int cpu = smp_processor_id();
+       struct xdp_buff xdp;
+       struct page *page;
++      __fec32 cbd_bufaddr;
+       u32 sub_len = 4;
+ 
+ #if !defined(CONFIG_M5272)
+@@ -1743,12 +1747,17 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
+ 
+               index = fec_enet_get_bd_index(bdp, &rxq->bd);
+               page = rxq->rx_skb_info[index].page;
++              cbd_bufaddr = bdp->cbd_bufaddr;
++              if (fec_enet_update_cbd(rxq, bdp, index)) {
++                      ndev->stats.rx_dropped++;
++                      goto rx_processing_done;
++              }
++
+               dma_sync_single_for_cpu(&fep->pdev->dev,
+-                                      fec32_to_cpu(bdp->cbd_bufaddr),
++                                      fec32_to_cpu(cbd_bufaddr),
+                                       pkt_len,
+                                       DMA_FROM_DEVICE);
+               prefetch(page_address(page));
+-              fec_enet_update_cbd(rxq, bdp, index);
+ 
+               if (xdp_prog) {
+                       xdp_buff_clear_frags_flag(&xdp);
+-- 
+2.39.5
+
diff --git a/queue-6.6/net-mlx5-clear-port-select-structure-when-fail-to-cr.patch b/queue-6.6/net-mlx5-clear-port-select-structure-when-fail-to-cr.patch

new file mode 100644 (file)

index 0000000..95257ec
--- /dev/null
+++ b/queue-6.6/net-mlx5-clear-port-select-structure-when-fail-to-cr.patch
@@ -0,0 +1,107 @@
+From ec4e60f815946e856a88ea2ea2342d362e635c14 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Jan 2025 13:39:07 +0200
+Subject: net/mlx5: Clear port select structure when fail to create
+
+From: Mark Zhang <markzhang@nvidia.com>
+
+[ Upstream commit 5641e82cb55b4ecbc6366a499300917d2f3e6790 ]
+
+Clear the port select structure on error so no stale values left after
+definers are destroyed. That's because the mlx5_lag_destroy_definers()
+always try to destroy all lag definers in the tt_map, so in the flow
+below lag definers get double-destroyed and cause kernel crash:
+
+  mlx5_lag_port_sel_create()
+    mlx5_lag_create_definers()
+      mlx5_lag_create_definer()     <- Failed on tt 1
+        mlx5_lag_destroy_definers() <- definers[tt=0] gets destroyed
+  mlx5_lag_port_sel_create()
+    mlx5_lag_create_definers()
+      mlx5_lag_create_definer()     <- Failed on tt 0
+        mlx5_lag_destroy_definers() <- definers[tt=0] gets double-destroyed
+
+ Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008
+ Mem abort info:
+   ESR = 0x0000000096000005
+   EC = 0x25: DABT (current EL), IL = 32 bits
+   SET = 0, FnV = 0
+   EA = 0, S1PTW = 0
+   FSC = 0x05: level 1 translation fault
+ Data abort info:
+   ISV = 0, ISS = 0x00000005, ISS2 = 0x00000000
+   CM = 0, WnR = 0, TnD = 0, TagAccess = 0
+   GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
+ user pgtable: 64k pages, 48-bit VAs, pgdp=0000000112ce2e00
+ [0000000000000008] pgd=0000000000000000, p4d=0000000000000000, pud=0000000000000000
+ Internal error: Oops: 0000000096000005 [#1] PREEMPT SMP
+ Modules linked in: iptable_raw bonding ip_gre ip6_gre gre ip6_tunnel tunnel6 geneve ip6_udp_tunnel udp_tunnel ipip tunnel4 ip_tunnel rdma_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_ib(OE) ib_uverbs(OE) mlx5_fwctl(OE) fwctl(OE) mlx5_core(OE) mlxdevm(OE) ib_core(OE) mlxfw(OE) memtrack(OE) mlx_compat(OE) openvswitch nsh nf_conncount psample xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xfrm_user xfrm_algo xt_addrtype iptable_filter iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 br_netfilter bridge stp llc netconsole overlay efi_pstore sch_fq_codel zram ip_tables crct10dif_ce qemu_fw_cfg fuse ipv6 crc_ccitt [last unloaded: mlx_compat(OE)]
+  CPU: 3 UID: 0 PID: 217 Comm: kworker/u53:2 Tainted: G           OE      6.11.0+ #2
+  Tainted: [O]=OOT_MODULE, [E]=UNSIGNED_MODULE
+  Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015
+  Workqueue: mlx5_lag mlx5_do_bond_work [mlx5_core]
+  pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
+  pc : mlx5_del_flow_rules+0x24/0x2c0 [mlx5_core]
+  lr : mlx5_lag_destroy_definer+0x54/0x100 [mlx5_core]
+  sp : ffff800085fafb00
+  x29: ffff800085fafb00 x28: ffff0000da0c8000 x27: 0000000000000000
+  x26: ffff0000da0c8000 x25: ffff0000da0c8000 x24: ffff0000da0c8000
+  x23: ffff0000c31f81a0 x22: 0400000000000000 x21: ffff0000da0c8000
+  x20: 0000000000000000 x19: 0000000000000001 x18: 0000000000000000
+  x17: 0000000000000000 x16: 0000000000000000 x15: 0000ffff8b0c9350
+  x14: 0000000000000000 x13: ffff800081390d18 x12: ffff800081dc3cc0
+  x11: 0000000000000001 x10: 0000000000000b10 x9 : ffff80007ab7304c
+  x8 : ffff0000d00711f0 x7 : 0000000000000004 x6 : 0000000000000190
+  x5 : ffff00027edb3010 x4 : 0000000000000000 x3 : 0000000000000000
+  x2 : ffff0000d39b8000 x1 : ffff0000d39b8000 x0 : 0400000000000000
+  Call trace:
+   mlx5_del_flow_rules+0x24/0x2c0 [mlx5_core]
+   mlx5_lag_destroy_definer+0x54/0x100 [mlx5_core]
+   mlx5_lag_destroy_definers+0xa0/0x108 [mlx5_core]
+   mlx5_lag_port_sel_create+0x2d4/0x6f8 [mlx5_core]
+   mlx5_activate_lag+0x60c/0x6f8 [mlx5_core]
+   mlx5_do_bond_work+0x284/0x5c8 [mlx5_core]
+   process_one_work+0x170/0x3e0
+   worker_thread+0x2d8/0x3e0
+   kthread+0x11c/0x128
+   ret_from_fork+0x10/0x20
+  Code: a9025bf5 aa0003f6 a90363f7 f90023f9 (f9400400)
+  ---[ end trace 0000000000000000 ]---
+
+Fixes: dc48516ec7d3 ("net/mlx5: Lag, add support to create definers for LAG")
+Signed-off-by: Mark Zhang <markzhang@nvidia.com>
+Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
+Reviewed-by: Mark Bloch <mbloch@nvidia.com>
+Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
+Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
+index 005661248c7e9..9faa9ef863a1b 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
+@@ -540,7 +540,7 @@ int mlx5_lag_port_sel_create(struct mlx5_lag *ldev,
+       set_tt_map(port_sel, hash_type);
+       err = mlx5_lag_create_definers(ldev, hash_type, ports);
+       if (err)
+-              return err;
++              goto clear_port_sel;
+ 
+       if (port_sel->tunnel) {
+               err = mlx5_lag_create_inner_ttc_table(ldev);
+@@ -559,6 +559,8 @@ int mlx5_lag_port_sel_create(struct mlx5_lag *ldev,
+               mlx5_destroy_ttc_table(port_sel->inner.ttc);
+ destroy_definers:
+       mlx5_lag_destroy_definers(ldev);
++clear_port_sel:
++      memset(port_sel, 0, sizeof(*port_sel));
+       return err;
+ }
+ 
+-- 
+2.39.5
+
diff --git a/queue-6.6/net-mlx5-fix-rdma-tx-steering-prio.patch b/queue-6.6/net-mlx5-fix-rdma-tx-steering-prio.patch

new file mode 100644 (file)

index 0000000..043b788
--- /dev/null
+++ b/queue-6.6/net-mlx5-fix-rdma-tx-steering-prio.patch
@@ -0,0 +1,39 @@
+From a5d363e14c7948b66d9048ff95a45b1006188ac7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Jan 2025 13:39:04 +0200
+Subject: net/mlx5: Fix RDMA TX steering prio
+
+From: Patrisious Haddad <phaddad@nvidia.com>
+
+[ Upstream commit c08d3e62b2e73e14da318a1d20b52d0486a28ee0 ]
+
+User added steering rules at RDMA_TX were being added to the first prio,
+which is the counters prio.
+Fix that so that they are correctly added to the BYPASS_PRIO instead.
+
+Fixes: 24670b1a3166 ("net/mlx5: Add support for RDMA TX steering")
+Signed-off-by: Patrisious Haddad <phaddad@nvidia.com>
+Reviewed-by: Mark Bloch <mbloch@nvidia.com>
+Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
+Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+index 474e63d02ba49..d2dc375f5e49c 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+@@ -2490,6 +2490,7 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
+               break;
+       case MLX5_FLOW_NAMESPACE_RDMA_TX:
+               root_ns = steering->rdma_tx_root_ns;
++              prio = RDMA_TX_BYPASS_PRIO;
+               break;
+       case MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS:
+               root_ns = steering->rdma_rx_root_ns;
+-- 
+2.39.5
+
diff --git a/queue-6.6/net-mlx5e-always-start-ipsec-sequence-number-from-1.patch b/queue-6.6/net-mlx5e-always-start-ipsec-sequence-number-from-1.patch

new file mode 100644 (file)

index 0000000..5defaa1
--- /dev/null
+++ b/queue-6.6/net-mlx5e-always-start-ipsec-sequence-number-from-1.patch
@@ -0,0 +1,85 @@
+From 98851af5ec364ba1374c298abea6768dbf640551 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Jan 2025 13:39:10 +0200
+Subject: net/mlx5e: Always start IPsec sequence number from 1
+
+From: Leon Romanovsky <leonro@nvidia.com>
+
+[ Upstream commit 7f95b0247764acd739d949ff247db4b76138e55a ]
+
+According to RFC4303, section "3.3.3. Sequence Number Generation",
+the first packet sent using a given SA will contain a sequence
+number of 1.
+
+This is applicable to both ESN and non-ESN mode, which was not covered
+in commit mentioned in Fixes line.
+
+Fixes: 3d42c8cc67a8 ("net/mlx5e: Ensure that IPsec sequence packet number starts from 1")
+Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
+Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
+Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c  |  6 ++++++
+ .../mellanox/mlx5/core/en_accel/ipsec_offload.c       | 11 ++++++++---
+ 2 files changed, 14 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+index 9fc6dbc83d141..463c23ae0ad1e 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+@@ -719,6 +719,12 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x,
+       /* check esn */
+       if (x->props.flags & XFRM_STATE_ESN)
+               mlx5e_ipsec_update_esn_state(sa_entry);
++      else
++              /* According to RFC4303, section "3.3.3. Sequence Number Generation",
++               * the first packet sent using a given SA will contain a sequence
++               * number of 1.
++               */
++              sa_entry->esn_state.esn = 1;
+ 
+       mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &sa_entry->attrs);
+ 
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
+index de83567aae791..940e350058d10 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
+@@ -90,8 +90,9 @@ u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev)
+ EXPORT_SYMBOL_GPL(mlx5_ipsec_device_caps);
+ 
+ static void mlx5e_ipsec_packet_setup(void *obj, u32 pdn,
+-                                   struct mlx5_accel_esp_xfrm_attrs *attrs)
++                                   struct mlx5e_ipsec_sa_entry *sa_entry)
+ {
++      struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs;
+       void *aso_ctx;
+ 
+       aso_ctx = MLX5_ADDR_OF(ipsec_obj, obj, ipsec_aso);
+@@ -119,8 +120,12 @@ static void mlx5e_ipsec_packet_setup(void *obj, u32 pdn,
+        * active.
+        */
+       MLX5_SET(ipsec_obj, obj, aso_return_reg, MLX5_IPSEC_ASO_REG_C_4_5);
+-      if (attrs->dir == XFRM_DEV_OFFLOAD_OUT)
++      if (attrs->dir == XFRM_DEV_OFFLOAD_OUT) {
+               MLX5_SET(ipsec_aso, aso_ctx, mode, MLX5_IPSEC_ASO_INC_SN);
++              if (!attrs->replay_esn.trigger)
++                      MLX5_SET(ipsec_aso, aso_ctx, mode_parameter,
++                               sa_entry->esn_state.esn);
++      }
+ 
+       if (attrs->lft.hard_packet_limit != XFRM_INF) {
+               MLX5_SET(ipsec_aso, aso_ctx, remove_flow_pkt_cnt,
+@@ -173,7 +178,7 @@ static int mlx5_create_ipsec_obj(struct mlx5e_ipsec_sa_entry *sa_entry)
+ 
+       res = &mdev->mlx5e_res.hw_objs;
+       if (attrs->type == XFRM_DEV_OFFLOAD_PACKET)
+-              mlx5e_ipsec_packet_setup(obj, res->pdn, attrs);
++              mlx5e_ipsec_packet_setup(obj, res->pdn, sa_entry);
+ 
+       err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+       if (!err)
+-- 
+2.39.5
+
diff --git a/queue-6.6/net-mlx5e-fix-inversion-dependency-warning-while-ena.patch b/queue-6.6/net-mlx5e-fix-inversion-dependency-warning-while-ena.patch

new file mode 100644 (file)

index 0000000..ea6516f
--- /dev/null
+++ b/queue-6.6/net-mlx5e-fix-inversion-dependency-warning-while-ena.patch
@@ -0,0 +1,342 @@
+From 74c5fb1aad987ec8035d576ae1cddc09b3e7d215 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Jan 2025 13:39:08 +0200
+Subject: net/mlx5e: Fix inversion dependency warning while enabling IPsec
+ tunnel
+
+From: Leon Romanovsky <leonro@nvidia.com>
+
+[ Upstream commit 2c3688090f8a1f085230aa839cc63e4a7b977df0 ]
+
+Attempt to enable IPsec packet offload in tunnel mode in debug kernel
+generates the following kernel panic, which is happening due to two
+issues:
+1. In SA add section, the should be _bh() variant when marking SA mode.
+2. There is not needed flush_workqueue in SA delete routine. It is not
+needed as at this stage as it is removed from SADB and the running work
+will be canceled later in SA free.
+
+ =====================================================
+ WARNING: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected
+ 6.12.0+ #4 Not tainted
+ -----------------------------------------------------
+ charon/1337 [HC0[0]:SC0[4]:HE1:SE0] is trying to acquire:
+ ffff88810f365020 (&xa->xa_lock#24){+.+.}-{3:3}, at: mlx5e_xfrm_del_state+0xca/0x1e0 [mlx5_core]
+
+ and this task is already holding:
+ ffff88813e0f0d48 (&x->lock){+.-.}-{3:3}, at: xfrm_state_delete+0x16/0x30
+ which would create a new lock dependency:
+  (&x->lock){+.-.}-{3:3} -> (&xa->xa_lock#24){+.+.}-{3:3}
+
+ but this new dependency connects a SOFTIRQ-irq-safe lock:
+  (&x->lock){+.-.}-{3:3}
+
+ ... which became SOFTIRQ-irq-safe at:
+   lock_acquire+0x1be/0x520
+   _raw_spin_lock_bh+0x34/0x40
+   xfrm_timer_handler+0x91/0xd70
+   __hrtimer_run_queues+0x1dd/0xa60
+   hrtimer_run_softirq+0x146/0x2e0
+   handle_softirqs+0x266/0x860
+   irq_exit_rcu+0x115/0x1a0
+   sysvec_apic_timer_interrupt+0x6e/0x90
+   asm_sysvec_apic_timer_interrupt+0x16/0x20
+   default_idle+0x13/0x20
+   default_idle_call+0x67/0xa0
+   do_idle+0x2da/0x320
+   cpu_startup_entry+0x50/0x60
+   start_secondary+0x213/0x2a0
+   common_startup_64+0x129/0x138
+
+ to a SOFTIRQ-irq-unsafe lock:
+  (&xa->xa_lock#24){+.+.}-{3:3}
+
+ ... which became SOFTIRQ-irq-unsafe at:
+ ...
+   lock_acquire+0x1be/0x520
+   _raw_spin_lock+0x2c/0x40
+   xa_set_mark+0x70/0x110
+   mlx5e_xfrm_add_state+0xe48/0x2290 [mlx5_core]
+   xfrm_dev_state_add+0x3bb/0xd70
+   xfrm_add_sa+0x2451/0x4a90
+   xfrm_user_rcv_msg+0x493/0x880
+   netlink_rcv_skb+0x12e/0x380
+   xfrm_netlink_rcv+0x6d/0x90
+   netlink_unicast+0x42f/0x740
+   netlink_sendmsg+0x745/0xbe0
+   __sock_sendmsg+0xc5/0x190
+   __sys_sendto+0x1fe/0x2c0
+   __x64_sys_sendto+0xdc/0x1b0
+   do_syscall_64+0x6d/0x140
+   entry_SYSCALL_64_after_hwframe+0x4b/0x53
+
+ other info that might help us debug this:
+
+  Possible interrupt unsafe locking scenario:
+
+        CPU0                    CPU1
+        ----                    ----
+   lock(&xa->xa_lock#24);
+                                local_irq_disable();
+                                lock(&x->lock);
+                                lock(&xa->xa_lock#24);
+   <Interrupt>
+     lock(&x->lock);
+
+  *** DEADLOCK ***
+
+ 2 locks held by charon/1337:
+  #0: ffffffff87f8f858 (&net->xfrm.xfrm_cfg_mutex){+.+.}-{4:4}, at: xfrm_netlink_rcv+0x5e/0x90
+  #1: ffff88813e0f0d48 (&x->lock){+.-.}-{3:3}, at: xfrm_state_delete+0x16/0x30
+
+ the dependencies between SOFTIRQ-irq-safe lock and the holding lock:
+ -> (&x->lock){+.-.}-{3:3} ops: 29 {
+    HARDIRQ-ON-W at:
+                     lock_acquire+0x1be/0x520
+                     _raw_spin_lock_bh+0x34/0x40
+                     xfrm_alloc_spi+0xc0/0xe60
+                     xfrm_alloc_userspi+0x5f6/0xbc0
+                     xfrm_user_rcv_msg+0x493/0x880
+                     netlink_rcv_skb+0x12e/0x380
+                     xfrm_netlink_rcv+0x6d/0x90
+                     netlink_unicast+0x42f/0x740
+                     netlink_sendmsg+0x745/0xbe0
+                     __sock_sendmsg+0xc5/0x190
+                     __sys_sendto+0x1fe/0x2c0
+                     __x64_sys_sendto+0xdc/0x1b0
+                     do_syscall_64+0x6d/0x140
+                     entry_SYSCALL_64_after_hwframe+0x4b/0x53
+    IN-SOFTIRQ-W at:
+                     lock_acquire+0x1be/0x520
+                     _raw_spin_lock_bh+0x34/0x40
+                     xfrm_timer_handler+0x91/0xd70
+                     __hrtimer_run_queues+0x1dd/0xa60
+                     hrtimer_run_softirq+0x146/0x2e0
+                     handle_softirqs+0x266/0x860
+                     irq_exit_rcu+0x115/0x1a0
+                     sysvec_apic_timer_interrupt+0x6e/0x90
+                     asm_sysvec_apic_timer_interrupt+0x16/0x20
+                     default_idle+0x13/0x20
+                     default_idle_call+0x67/0xa0
+                     do_idle+0x2da/0x320
+                     cpu_startup_entry+0x50/0x60
+                     start_secondary+0x213/0x2a0
+                     common_startup_64+0x129/0x138
+    INITIAL USE at:
+                    lock_acquire+0x1be/0x520
+                    _raw_spin_lock_bh+0x34/0x40
+                    xfrm_alloc_spi+0xc0/0xe60
+                    xfrm_alloc_userspi+0x5f6/0xbc0
+                    xfrm_user_rcv_msg+0x493/0x880
+                    netlink_rcv_skb+0x12e/0x380
+                    xfrm_netlink_rcv+0x6d/0x90
+                    netlink_unicast+0x42f/0x740
+                    netlink_sendmsg+0x745/0xbe0
+                    __sock_sendmsg+0xc5/0x190
+                    __sys_sendto+0x1fe/0x2c0
+                    __x64_sys_sendto+0xdc/0x1b0
+                    do_syscall_64+0x6d/0x140
+                    entry_SYSCALL_64_after_hwframe+0x4b/0x53
+  }
+  ... key      at: [<ffffffff87f9cd20>] __key.18+0x0/0x40
+
+ the dependencies between the lock to be acquired
+  and SOFTIRQ-irq-unsafe lock:
+ -> (&xa->xa_lock#24){+.+.}-{3:3} ops: 9 {
+    HARDIRQ-ON-W at:
+                     lock_acquire+0x1be/0x520
+                     _raw_spin_lock_bh+0x34/0x40
+                     mlx5e_xfrm_add_state+0xc5b/0x2290 [mlx5_core]
+                     xfrm_dev_state_add+0x3bb/0xd70
+                     xfrm_add_sa+0x2451/0x4a90
+                     xfrm_user_rcv_msg+0x493/0x880
+                     netlink_rcv_skb+0x12e/0x380
+                     xfrm_netlink_rcv+0x6d/0x90
+                     netlink_unicast+0x42f/0x740
+                     netlink_sendmsg+0x745/0xbe0
+                     __sock_sendmsg+0xc5/0x190
+                     __sys_sendto+0x1fe/0x2c0
+                     __x64_sys_sendto+0xdc/0x1b0
+                     do_syscall_64+0x6d/0x140
+                     entry_SYSCALL_64_after_hwframe+0x4b/0x53
+    SOFTIRQ-ON-W at:
+                     lock_acquire+0x1be/0x520
+                     _raw_spin_lock+0x2c/0x40
+                     xa_set_mark+0x70/0x110
+                     mlx5e_xfrm_add_state+0xe48/0x2290 [mlx5_core]
+                     xfrm_dev_state_add+0x3bb/0xd70
+                     xfrm_add_sa+0x2451/0x4a90
+                     xfrm_user_rcv_msg+0x493/0x880
+                     netlink_rcv_skb+0x12e/0x380
+                     xfrm_netlink_rcv+0x6d/0x90
+                     netlink_unicast+0x42f/0x740
+                     netlink_sendmsg+0x745/0xbe0
+                     __sock_sendmsg+0xc5/0x190
+                     __sys_sendto+0x1fe/0x2c0
+                     __x64_sys_sendto+0xdc/0x1b0
+                     do_syscall_64+0x6d/0x140
+                     entry_SYSCALL_64_after_hwframe+0x4b/0x53
+    INITIAL USE at:
+                    lock_acquire+0x1be/0x520
+                    _raw_spin_lock_bh+0x34/0x40
+                    mlx5e_xfrm_add_state+0xc5b/0x2290 [mlx5_core]
+                    xfrm_dev_state_add+0x3bb/0xd70
+                    xfrm_add_sa+0x2451/0x4a90
+                    xfrm_user_rcv_msg+0x493/0x880
+                    netlink_rcv_skb+0x12e/0x380
+                    xfrm_netlink_rcv+0x6d/0x90
+                    netlink_unicast+0x42f/0x740
+                    netlink_sendmsg+0x745/0xbe0
+                    __sock_sendmsg+0xc5/0x190
+                    __sys_sendto+0x1fe/0x2c0
+                    __x64_sys_sendto+0xdc/0x1b0
+                    do_syscall_64+0x6d/0x140
+                    entry_SYSCALL_64_after_hwframe+0x4b/0x53
+  }
+  ... key      at: [<ffffffffa078ff60>] __key.48+0x0/0xfffffffffff210a0 [mlx5_core]
+  ... acquired at:
+    __lock_acquire+0x30a0/0x5040
+    lock_acquire+0x1be/0x520
+    _raw_spin_lock_bh+0x34/0x40
+    mlx5e_xfrm_del_state+0xca/0x1e0 [mlx5_core]
+    xfrm_dev_state_delete+0x90/0x160
+    __xfrm_state_delete+0x662/0xae0
+    xfrm_state_delete+0x1e/0x30
+    xfrm_del_sa+0x1c2/0x340
+    xfrm_user_rcv_msg+0x493/0x880
+    netlink_rcv_skb+0x12e/0x380
+    xfrm_netlink_rcv+0x6d/0x90
+    netlink_unicast+0x42f/0x740
+    netlink_sendmsg+0x745/0xbe0
+    __sock_sendmsg+0xc5/0x190
+    __sys_sendto+0x1fe/0x2c0
+    __x64_sys_sendto+0xdc/0x1b0
+    do_syscall_64+0x6d/0x140
+    entry_SYSCALL_64_after_hwframe+0x4b/0x53
+
+ stack backtrace:
+ CPU: 7 UID: 0 PID: 1337 Comm: charon Not tainted 6.12.0+ #4
+ Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
+ Call Trace:
+  <TASK>
+  dump_stack_lvl+0x74/0xd0
+  check_irq_usage+0x12e8/0x1d90
+  ? print_shortest_lock_dependencies_backwards+0x1b0/0x1b0
+  ? check_chain_key+0x1bb/0x4c0
+  ? __lockdep_reset_lock+0x180/0x180
+  ? check_path.constprop.0+0x24/0x50
+  ? mark_lock+0x108/0x2fb0
+  ? print_circular_bug+0x9b0/0x9b0
+  ? mark_lock+0x108/0x2fb0
+  ? print_usage_bug.part.0+0x670/0x670
+  ? check_prev_add+0x1c4/0x2310
+  check_prev_add+0x1c4/0x2310
+  __lock_acquire+0x30a0/0x5040
+  ? lockdep_set_lock_cmp_fn+0x190/0x190
+  ? lockdep_set_lock_cmp_fn+0x190/0x190
+  lock_acquire+0x1be/0x520
+  ? mlx5e_xfrm_del_state+0xca/0x1e0 [mlx5_core]
+  ? lockdep_hardirqs_on_prepare+0x400/0x400
+  ? __xfrm_state_delete+0x5f0/0xae0
+  ? lock_downgrade+0x6b0/0x6b0
+  _raw_spin_lock_bh+0x34/0x40
+  ? mlx5e_xfrm_del_state+0xca/0x1e0 [mlx5_core]
+  mlx5e_xfrm_del_state+0xca/0x1e0 [mlx5_core]
+  xfrm_dev_state_delete+0x90/0x160
+  __xfrm_state_delete+0x662/0xae0
+  xfrm_state_delete+0x1e/0x30
+  xfrm_del_sa+0x1c2/0x340
+  ? xfrm_get_sa+0x250/0x250
+  ? check_chain_key+0x1bb/0x4c0
+  xfrm_user_rcv_msg+0x493/0x880
+  ? copy_sec_ctx+0x270/0x270
+  ? check_chain_key+0x1bb/0x4c0
+  ? lockdep_set_lock_cmp_fn+0x190/0x190
+  ? lockdep_set_lock_cmp_fn+0x190/0x190
+  netlink_rcv_skb+0x12e/0x380
+  ? copy_sec_ctx+0x270/0x270
+  ? netlink_ack+0xd90/0xd90
+  ? netlink_deliver_tap+0xcd/0xb60
+  xfrm_netlink_rcv+0x6d/0x90
+  netlink_unicast+0x42f/0x740
+  ? netlink_attachskb+0x730/0x730
+  ? lock_acquire+0x1be/0x520
+  netlink_sendmsg+0x745/0xbe0
+  ? netlink_unicast+0x740/0x740
+  ? __might_fault+0xbb/0x170
+  ? netlink_unicast+0x740/0x740
+  __sock_sendmsg+0xc5/0x190
+  ? fdget+0x163/0x1d0
+  __sys_sendto+0x1fe/0x2c0
+  ? __x64_sys_getpeername+0xb0/0xb0
+  ? do_user_addr_fault+0x856/0xe30
+  ? lock_acquire+0x1be/0x520
+  ? __task_pid_nr_ns+0x117/0x410
+  ? lock_downgrade+0x6b0/0x6b0
+  __x64_sys_sendto+0xdc/0x1b0
+  ? lockdep_hardirqs_on_prepare+0x284/0x400
+  do_syscall_64+0x6d/0x140
+  entry_SYSCALL_64_after_hwframe+0x4b/0x53
+ RIP: 0033:0x7f7d31291ba4
+ Code: 7d e8 89 4d d4 e8 4c 42 f7 ff 44 8b 4d d0 4c 8b 45 c8 89 c3 44 8b 55 d4 8b 7d e8 b8 2c 00 00 00 48 8b 55 d8 48 8b 75 e0 0f 05 <48> 3d 00 f0 ff ff 77 34 89 df 48 89 45 e8 e8 99 42 f7 ff 48 8b 45
+ RSP: 002b:00007f7d2ccd94f0 EFLAGS: 00000297 ORIG_RAX: 000000000000002c
+ RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007f7d31291ba4
+ RDX: 0000000000000028 RSI: 00007f7d2ccd96a0 RDI: 000000000000000a
+ RBP: 00007f7d2ccd9530 R08: 00007f7d2ccd9598 R09: 000000000000000c
+ R10: 0000000000000000 R11: 0000000000000297 R12: 0000000000000028
+ R13: 00007f7d2ccd9598 R14: 00007f7d2ccd96a0 R15: 00000000000000e1
+  </TASK>
+
+Fixes: 4c24272b4e2b ("net/mlx5e: Listen to ARP events to update IPsec L2 headers in tunnel mode")
+Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
+Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../ethernet/mellanox/mlx5/core/en_accel/ipsec.c | 16 ++++++----------
+ 1 file changed, 6 insertions(+), 10 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+index 015faddabc8e0..9fc6dbc83d141 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+@@ -763,9 +763,12 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x,
+                                  MLX5_IPSEC_RESCHED);
+ 
+       if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET &&
+-          x->props.mode == XFRM_MODE_TUNNEL)
+-              xa_set_mark(&ipsec->sadb, sa_entry->ipsec_obj_id,
+-                          MLX5E_IPSEC_TUNNEL_SA);
++          x->props.mode == XFRM_MODE_TUNNEL) {
++              xa_lock_bh(&ipsec->sadb);
++              __xa_set_mark(&ipsec->sadb, sa_entry->ipsec_obj_id,
++                            MLX5E_IPSEC_TUNNEL_SA);
++              xa_unlock_bh(&ipsec->sadb);
++      }
+ 
+ out:
+       x->xso.offload_handle = (unsigned long)sa_entry;
+@@ -792,7 +795,6 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x,
+ static void mlx5e_xfrm_del_state(struct xfrm_state *x)
+ {
+       struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
+-      struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs;
+       struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
+       struct mlx5e_ipsec_sa_entry *old;
+ 
+@@ -801,12 +803,6 @@ static void mlx5e_xfrm_del_state(struct xfrm_state *x)
+ 
+       old = xa_erase_bh(&ipsec->sadb, sa_entry->ipsec_obj_id);
+       WARN_ON(old != sa_entry);
+-
+-      if (attrs->mode == XFRM_MODE_TUNNEL &&
+-          attrs->type == XFRM_DEV_OFFLOAD_PACKET)
+-              /* Make sure that no ARP requests are running in parallel */
+-              flush_workqueue(ipsec->wq);
+-
+ }
+ 
+ static void mlx5e_xfrm_free_state(struct xfrm_state *x)
+-- 
+2.39.5
+
diff --git a/queue-6.6/net-mlx5e-rely-on-reqid-in-ipsec-tunnel-mode.patch b/queue-6.6/net-mlx5e-rely-on-reqid-in-ipsec-tunnel-mode.patch

new file mode 100644 (file)

index 0000000..494f45c
--- /dev/null
+++ b/queue-6.6/net-mlx5e-rely-on-reqid-in-ipsec-tunnel-mode.patch
@@ -0,0 +1,87 @@
+From 8dcacf66eeb1186c517b4a791f43b27095ee2413 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Jan 2025 13:39:09 +0200
+Subject: net/mlx5e: Rely on reqid in IPsec tunnel mode
+
+From: Leon Romanovsky <leonro@nvidia.com>
+
+[ Upstream commit 25f23524dfa227959beb3b2c2c0f38e0222f4cfa ]
+
+All packet offloads SAs have reqid in it to make sure they have
+corresponding policy. While it is not strictly needed for transparent
+mode, it is extremely important in tunnel mode. In that mode, policy and
+SAs have different match criteria.
+
+Policy catches the whole subnet addresses, and SA catches the tunnel gateways
+addresses. The source address of such tunnel is not known during egress packet
+traversal in flow steering as it is added only after successful encryption.
+
+As reqid is required for packet offload and it is unique for every SA,
+we can safely rely on it only.
+
+The output below shows the configured egress policy and SA by strongswan:
+
+[leonro@vm ~]$ sudo ip x s
+src 192.169.101.2 dst 192.169.101.1
+        proto esp spi 0xc88b7652 reqid 1 mode tunnel
+        replay-window 0 flag af-unspec esn
+        aead rfc4106(gcm(aes)) 0xe406a01083986e14d116488549094710e9c57bc6 128
+        anti-replay esn context:
+         seq-hi 0x0, seq 0x0, oseq-hi 0x0, oseq 0x0
+         replay_window 1, bitmap-length 1
+         00000000
+        crypto offload parameters: dev eth2 dir out mode packet
+
+[leonro@064 ~]$ sudo ip x p
+src 192.170.0.0/16 dst 192.170.0.0/16
+        dir out priority 383615 ptype main
+        tmpl src 192.169.101.2 dst 192.169.101.1
+                proto esp spi 0xc88b7652 reqid 1 mode tunnel
+        crypto offload parameters: dev eth2 mode packet
+
+Fixes: b3beba1fb404 ("net/mlx5e: Allow policies with reqid 0, to support IKE policy holes")
+Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
+Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
+Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c  | 12 +++++-------
+ 1 file changed, 5 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+index 61288066830d9..2382c71289857 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+@@ -1442,23 +1442,21 @@ static int tx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
+               goto err_alloc;
+       }
+ 
+-      if (attrs->family == AF_INET)
+-              setup_fte_addr4(spec, &attrs->saddr.a4, &attrs->daddr.a4);
+-      else
+-              setup_fte_addr6(spec, attrs->saddr.a6, attrs->daddr.a6);
+-
+       setup_fte_no_frags(spec);
+       setup_fte_upper_proto_match(spec, &attrs->upspec);
+ 
+       switch (attrs->type) {
+       case XFRM_DEV_OFFLOAD_CRYPTO:
++              if (attrs->family == AF_INET)
++                      setup_fte_addr4(spec, &attrs->saddr.a4, &attrs->daddr.a4);
++              else
++                      setup_fte_addr6(spec, attrs->saddr.a6, attrs->daddr.a6);
+               setup_fte_spi(spec, attrs->spi, false);
+               setup_fte_esp(spec);
+               setup_fte_reg_a(spec);
+               break;
+       case XFRM_DEV_OFFLOAD_PACKET:
+-              if (attrs->reqid)
+-                      setup_fte_reg_c4(spec, attrs->reqid);
++              setup_fte_reg_c4(spec, attrs->reqid);
+               err = setup_pkt_reformat(ipsec, attrs, &flow_act);
+               if (err)
+                       goto err_pkt_reformat;
+-- 
+2.39.5
+
diff --git a/queue-6.6/net-xilinx-axienet-fix-irq-coalescing-packet-count-o.patch b/queue-6.6/net-xilinx-axienet-fix-irq-coalescing-packet-count-o.patch

new file mode 100644 (file)

index 0000000..4432809
--- /dev/null
+++ b/queue-6.6/net-xilinx-axienet-fix-irq-coalescing-packet-count-o.patch
@@ -0,0 +1,48 @@
+From 629d40944c06e11b6793e367b18abb28f25f64d6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Jan 2025 11:30:00 -0500
+Subject: net: xilinx: axienet: Fix IRQ coalescing packet count overflow
+
+From: Sean Anderson <sean.anderson@linux.dev>
+
+[ Upstream commit c17ff476f53afb30f90bb3c2af77de069c81a622 ]
+
+If coalesce_count is greater than 255 it will not fit in the register and
+will overflow. This can be reproduced by running
+
+    # ethtool -C ethX rx-frames 256
+
+which will result in a timeout of 0us instead. Fix this by checking for
+invalid values and reporting an error.
+
+Fixes: 8a3b7a252dca ("drivers/net/ethernet/xilinx: added Xilinx AXI Ethernet driver")
+Signed-off-by: Sean Anderson <sean.anderson@linux.dev>
+Reviewed-by: Shannon Nelson <shannon.nelson@amd.com>
+Reviewed-by: Radhey Shyam Pandey <radhey.shyam.pandey@amd.com>
+Link: https://patch.msgid.link/20250113163001.2335235-1-sean.anderson@linux.dev
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+index 9f779653ed622..02e11827440b5 100644
+--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
++++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+@@ -1571,6 +1571,12 @@ axienet_ethtools_set_coalesce(struct net_device *ndev,
+               return -EFAULT;
+       }
+ 
++      if (ecoalesce->rx_max_coalesced_frames > 255 ||
++          ecoalesce->tx_max_coalesced_frames > 255) {
++              NL_SET_ERR_MSG(extack, "frames must be less than 256");
++              return -EINVAL;
++      }
++
+       if (ecoalesce->rx_max_coalesced_frames)
+               lp->coalesce_count_rx = ecoalesce->rx_max_coalesced_frames;
+       if (ecoalesce->rx_coalesce_usecs)
+-- 
+2.39.5
+
diff --git a/queue-6.6/nfp-bpf-prevent-integer-overflow-in-nfp_bpf_event_ou.patch b/queue-6.6/nfp-bpf-prevent-integer-overflow-in-nfp_bpf_event_ou.patch

new file mode 100644 (file)

index 0000000..ef3b9ae
--- /dev/null
+++ b/queue-6.6/nfp-bpf-prevent-integer-overflow-in-nfp_bpf_event_ou.patch
@@ -0,0 +1,39 @@
+From 32be1c628637b39ba250cce113dd13bf4aac7e64 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Jan 2025 09:18:39 +0300
+Subject: nfp: bpf: prevent integer overflow in nfp_bpf_event_output()
+
+From: Dan Carpenter <dan.carpenter@linaro.org>
+
+[ Upstream commit 16ebb6f5b6295c9688749862a39a4889c56227f8 ]
+
+The "sizeof(struct cmsg_bpf_event) + pkt_size + data_size" math could
+potentially have an integer wrapping bug on 32bit systems.  Check for
+this and return an error.
+
+Fixes: 9816dd35ecec ("nfp: bpf: perf event output helpers support")
+Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
+Link: https://patch.msgid.link/6074805b-e78d-4b8a-bf05-e929b5377c28@stanley.mountain
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/netronome/nfp/bpf/offload.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
+index 9d97cd281f18e..c03558adda91e 100644
+--- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c
++++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
+@@ -458,7 +458,8 @@ int nfp_bpf_event_output(struct nfp_app_bpf *bpf, const void *data,
+       map_id_full = be64_to_cpu(cbe->map_ptr);
+       map_id = map_id_full;
+ 
+-      if (len < sizeof(struct cmsg_bpf_event) + pkt_size + data_size)
++      if (size_add(pkt_size, data_size) > INT_MAX ||
++          len < sizeof(struct cmsg_bpf_event) + pkt_size + data_size)
+               return -EINVAL;
+       if (cbe->hdr.ver != NFP_CCM_ABI_VERSION)
+               return -EINVAL;
+-- 
+2.39.5
+
diff --git a/queue-6.6/openvswitch-fix-lockup-on-tx-to-unregistering-netdev.patch b/queue-6.6/openvswitch-fix-lockup-on-tx-to-unregistering-netdev.patch

new file mode 100644 (file)

index 0000000..ad7d80a
--- /dev/null
+++ b/queue-6.6/openvswitch-fix-lockup-on-tx-to-unregistering-netdev.patch
@@ -0,0 +1,79 @@
+From 366becb8a9e98772d634248f1ca03b5ff7ffdb8d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 9 Jan 2025 13:21:24 +0100
+Subject: openvswitch: fix lockup on tx to unregistering netdev with carrier
+
+From: Ilya Maximets <i.maximets@ovn.org>
+
+[ Upstream commit 47e55e4b410f7d552e43011baa5be1aab4093990 ]
+
+Commit in a fixes tag attempted to fix the issue in the following
+sequence of calls:
+
+    do_output
+    -> ovs_vport_send
+       -> dev_queue_xmit
+          -> __dev_queue_xmit
+             -> netdev_core_pick_tx
+                -> skb_tx_hash
+
+When device is unregistering, the 'dev->real_num_tx_queues' goes to
+zero and the 'while (unlikely(hash >= qcount))' loop inside the
+'skb_tx_hash' becomes infinite, locking up the core forever.
+
+But unfortunately, checking just the carrier status is not enough to
+fix the issue, because some devices may still be in unregistering
+state while reporting carrier status OK.
+
+One example of such device is a net/dummy.  It sets carrier ON
+on start, but it doesn't implement .ndo_stop to set the carrier off.
+And it makes sense, because dummy doesn't really have a carrier.
+Therefore, while this device is unregistering, it's still easy to hit
+the infinite loop in the skb_tx_hash() from the OVS datapath.  There
+might be other drivers that do the same, but dummy by itself is
+important for the OVS ecosystem, because it is frequently used as a
+packet sink for tcpdump while debugging OVS deployments.  And when the
+issue is hit, the only way to recover is to reboot.
+
+Fix that by also checking if the device is running.  The running
+state is handled by the net core during unregistering, so it covers
+unregistering case better, and we don't really need to send packets
+to devices that are not running anyway.
+
+While only checking the running state might be enough, the carrier
+check is preserved.  The running and the carrier states seem disjoined
+throughout the code and different drivers.  And other core functions
+like __dev_direct_xmit() check both before attempting to transmit
+a packet.  So, it seems safer to check both flags in OVS as well.
+
+Fixes: 066b86787fa3 ("net: openvswitch: fix race on port output")
+Reported-by: Friedrich Weber <f.weber@proxmox.com>
+Closes: https://mail.openvswitch.org/pipermail/ovs-discuss/2025-January/053423.html
+Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
+Tested-by: Friedrich Weber <f.weber@proxmox.com>
+Reviewed-by: Aaron Conole <aconole@redhat.com>
+Link: https://patch.msgid.link/20250109122225.4034688-1-i.maximets@ovn.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/openvswitch/actions.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
+index 4f5cbcaa38386..9445ca97163b4 100644
+--- a/net/openvswitch/actions.c
++++ b/net/openvswitch/actions.c
+@@ -918,7 +918,9 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
+ {
+       struct vport *vport = ovs_vport_rcu(dp, out_port);
+ 
+-      if (likely(vport && netif_carrier_ok(vport->dev))) {
++      if (likely(vport &&
++                 netif_running(vport->dev) &&
++                 netif_carrier_ok(vport->dev))) {
+               u16 mru = OVS_CB(skb)->mru;
+               u32 cutlen = OVS_CB(skb)->cutlen;
+ 
+-- 
+2.39.5
+
diff --git a/queue-6.6/pktgen-avoid-out-of-bounds-access-in-get_imix_entrie.patch b/queue-6.6/pktgen-avoid-out-of-bounds-access-in-get_imix_entrie.patch

new file mode 100644 (file)

index 0000000..bf6646a
--- /dev/null
+++ b/queue-6.6/pktgen-avoid-out-of-bounds-access-in-get_imix_entrie.patch
@@ -0,0 +1,68 @@
+From 0c7155030203dc5b6466b1193d127216d340d83f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 9 Jan 2025 11:30:39 +0300
+Subject: pktgen: Avoid out-of-bounds access in get_imix_entries
+
+From: Artem Chernyshev <artem.chernyshev@red-soft.ru>
+
+[ Upstream commit 76201b5979768500bca362871db66d77cb4c225e ]
+
+Passing a sufficient amount of imix entries leads to invalid access to the
+pkt_dev->imix_entries array because of the incorrect boundary check.
+
+UBSAN: array-index-out-of-bounds in net/core/pktgen.c:874:24
+index 20 is out of range for type 'imix_pkt [20]'
+CPU: 2 PID: 1210 Comm: bash Not tainted 6.10.0-rc1 #121
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996)
+Call Trace:
+<TASK>
+dump_stack_lvl lib/dump_stack.c:117
+__ubsan_handle_out_of_bounds lib/ubsan.c:429
+get_imix_entries net/core/pktgen.c:874
+pktgen_if_write net/core/pktgen.c:1063
+pde_write fs/proc/inode.c:334
+proc_reg_write fs/proc/inode.c:346
+vfs_write fs/read_write.c:593
+ksys_write fs/read_write.c:644
+do_syscall_64 arch/x86/entry/common.c:83
+entry_SYSCALL_64_after_hwframe arch/x86/entry/entry_64.S:130
+
+Found by Linux Verification Center (linuxtesting.org) with SVACE.
+
+Fixes: 52a62f8603f9 ("pktgen: Parse internet mix (imix) input")
+Signed-off-by: Artem Chernyshev <artem.chernyshev@red-soft.ru>
+[ fp: allow to fill the array completely; minor changelog cleanup ]
+Signed-off-by: Fedor Pchelkin <pchelkin@ispras.ru>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/pktgen.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/net/core/pktgen.c b/net/core/pktgen.c
+index 0e472f6fab853..359e24c3f22ca 100644
+--- a/net/core/pktgen.c
++++ b/net/core/pktgen.c
+@@ -850,6 +850,9 @@ static ssize_t get_imix_entries(const char __user *buffer,
+               unsigned long weight;
+               unsigned long size;
+ 
++              if (pkt_dev->n_imix_entries >= MAX_IMIX_ENTRIES)
++                      return -E2BIG;
++
+               len = num_arg(&buffer[i], max_digits, &size);
+               if (len < 0)
+                       return len;
+@@ -879,9 +882,6 @@ static ssize_t get_imix_entries(const char __user *buffer,
+ 
+               i++;
+               pkt_dev->n_imix_entries++;
+-
+-              if (pkt_dev->n_imix_entries > MAX_IMIX_ENTRIES)
+-                      return -E2BIG;
+       } while (c == ' ');
+ 
+       return i;
+-- 
+2.39.5
+
diff --git a/queue-6.6/series b/queue-6.6/series

new file mode 100644 (file)

index 0000000..91cf604
--- /dev/null
+++ b/queue-6.6/series
@@ -0,0 +1,18 @@
+net-ethernet-ti-cpsw_ale-fix-cpsw_ale_get_field.patch
+bpf-fix-bpf_sk_select_reuseport-memory-leak.patch
+openvswitch-fix-lockup-on-tx-to-unregistering-netdev.patch
+pktgen-avoid-out-of-bounds-access-in-get_imix_entrie.patch
+net-add-exit_batch_rtnl-method.patch
+gtp-use-exit_batch_rtnl-method.patch
+gtp-use-for_each_netdev_rcu-in-gtp_genl_dump_pdp.patch
+gtp-destroy-device-along-with-udp-socket-s-netns-dis.patch
+nfp-bpf-prevent-integer-overflow-in-nfp_bpf_event_ou.patch
+net-xilinx-axienet-fix-irq-coalescing-packet-count-o.patch
+net-fec-handle-page_pool_dev_alloc_pages-error.patch
+net-mlx5-fix-rdma-tx-steering-prio.patch
+net-mlx5-clear-port-select-structure-when-fail-to-cr.patch
+net-mlx5e-fix-inversion-dependency-warning-while-ena.patch
+net-mlx5e-rely-on-reqid-in-ipsec-tunnel-mode.patch
+net-mlx5e-always-start-ipsec-sequence-number-from-1.patch
+drm-vmwgfx-add-new-keep_resv-bo-param.patch
+drm-v3d-ensure-job-pointer-is-set-to-null-after-job-.patch
author	Sasha Levin <sashal@kernel.org>
	Sat, 18 Jan 2025 17:23:27 +0000 (12:23 -0500)
committer	Sasha Levin <sashal@kernel.org>
	Sat, 18 Jan 2025 17:23:27 +0000 (12:23 -0500)
queue-6.6/bpf-fix-bpf_sk_select_reuseport-memory-leak.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/drm-v3d-ensure-job-pointer-is-set-to-null-after-job-.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/drm-vmwgfx-add-new-keep_resv-bo-param.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/gtp-destroy-device-along-with-udp-socket-s-netns-dis.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/gtp-use-exit_batch_rtnl-method.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/gtp-use-for_each_netdev_rcu-in-gtp_genl_dump_pdp.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/net-add-exit_batch_rtnl-method.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/net-ethernet-ti-cpsw_ale-fix-cpsw_ale_get_field.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/net-fec-handle-page_pool_dev_alloc_pages-error.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/net-mlx5-clear-port-select-structure-when-fail-to-cr.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/net-mlx5-fix-rdma-tx-steering-prio.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/net-mlx5e-always-start-ipsec-sequence-number-from-1.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/net-mlx5e-fix-inversion-dependency-warning-while-ena.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/net-mlx5e-rely-on-reqid-in-ipsec-tunnel-mode.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/net-xilinx-axienet-fix-irq-coalescing-packet-count-o.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/nfp-bpf-prevent-integer-overflow-in-nfp_bpf_event_ou.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/openvswitch-fix-lockup-on-tx-to-unregistering-netdev.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/pktgen-avoid-out-of-bounds-access-in-get_imix_entrie.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/series	[new file with mode: 0644]	patch \| blob