]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
net: devmem: refresh devmem TX dst in case of route invalidation
authorShivaji Kant <shivajikant@google.com>
Wed, 29 Oct 2025 06:54:19 +0000 (06:54 +0000)
committerJakub Kicinski <kuba@kernel.org>
Thu, 30 Oct 2025 02:23:21 +0000 (19:23 -0700)
The zero-copy Device Memory (Devmem) transmit path
relies on the socket's route cache (`dst_entry`) to
validate that the packet is being sent via the network
device to which the DMA buffer was bound.

However, this check incorrectly fails and returns `-ENODEV`
if the socket's route cache entry (`dst`) is merely missing
or expired (`dst == NULL`). This scenario is observed during
network events, such as when flow steering rules are deleted,
leading to a temporary route cache invalidation.

This patch fixes -ENODEV error for `net_devmem_get_binding()`
by doing the following:

1.  It attempts to rebuild the route via `rebuild_header()`
if the route is initially missing (`dst == NULL`). This
allows the TCP/IP stack to recover from transient route
cache misses.
2.  It uses `rcu_read_lock()` and `dst_dev_rcu()` to safely
access the network device pointer (`dst_dev`) from the
route, preventing use-after-free conditions if the
device is concurrently removed.
3.  It maintains the critical safety check by validating
that the retrieved destination device (`dst_dev`) is
exactly the device registered in the Devmem binding
(`binding->dev`).

These changes prevent unnecessary ENODEV failures while
maintaining the critical safety requirement that the
Devmem resources are only used on the bound network device.

Reviewed-by: Bobby Eshleman <bobbyeshleman@meta.com>
Reported-by: Eric Dumazet <edumazet@google.com>
Reported-by: Vedant Mathur <vedantmathur@google.com>
Suggested-by: Eric Dumazet <edumazet@google.com>
Fixes: bd61848900bf ("net: devmem: Implement TX path")
Signed-off-by: Shivaji Kant <shivajikant@google.com>
Link: https://patch.msgid.link/20251029065420.3489943-1-shivajikant@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
net/core/devmem.c

index d9de31a6cc7f89da62c8d66b45f9207b6b7f4fc4..1d04754bc756d40a125b68abf2a0a0ceda464992 100644 (file)
@@ -17,6 +17,7 @@
 #include <net/page_pool/helpers.h>
 #include <net/page_pool/memory_provider.h>
 #include <net/sock.h>
+#include <net/tcp.h>
 #include <trace/events/page_pool.h>
 
 #include "devmem.h"
@@ -357,7 +358,8 @@ struct net_devmem_dmabuf_binding *net_devmem_get_binding(struct sock *sk,
                                                         unsigned int dmabuf_id)
 {
        struct net_devmem_dmabuf_binding *binding;
-       struct dst_entry *dst = __sk_dst_get(sk);
+       struct net_device *dst_dev;
+       struct dst_entry *dst;
        int err = 0;
 
        binding = net_devmem_lookup_dmabuf(dmabuf_id);
@@ -366,16 +368,35 @@ struct net_devmem_dmabuf_binding *net_devmem_get_binding(struct sock *sk,
                goto out_err;
        }
 
+       rcu_read_lock();
+       dst = __sk_dst_get(sk);
+       /* If dst is NULL (route expired), attempt to rebuild it. */
+       if (unlikely(!dst)) {
+               if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) {
+                       err = -EHOSTUNREACH;
+                       goto out_unlock;
+               }
+               dst = __sk_dst_get(sk);
+               if (unlikely(!dst)) {
+                       err = -ENODEV;
+                       goto out_unlock;
+               }
+       }
+
        /* The dma-addrs in this binding are only reachable to the corresponding
         * net_device.
         */
-       if (!dst || !dst->dev || dst->dev->ifindex != binding->dev->ifindex) {
+       dst_dev = dst_dev_rcu(dst);
+       if (unlikely(!dst_dev) || unlikely(dst_dev != binding->dev)) {
                err = -ENODEV;
-               goto out_err;
+               goto out_unlock;
        }
 
+       rcu_read_unlock();
        return binding;
 
+out_unlock:
+       rcu_read_unlock();
 out_err:
        if (binding)
                net_devmem_dmabuf_binding_put(binding);