]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
net: devmem: support TX over NETMEM_TX_NO_DMA devices
authorBobby Eshleman <bobbyeshleman@meta.com>
Thu, 14 May 2026 17:22:30 +0000 (10:22 -0700)
committerJakub Kicinski <kuba@kernel.org>
Tue, 19 May 2026 01:49:06 +0000 (18:49 -0700)
When a netkit virtual device leases queues from a physical NIC, devmem
TX bindings created on the netkit device must still result in the dmabuf
being mapped for dma by the physical device. This patch accomplishes
this by teaching the bind handler to search for the underlying
DMA-capable device by looking it up via leased rx queues. The function
netdev_find_netmem_tx_dev(), used for finding the underlying DMA-capable
device, can be extended to support other non-netkit NETMEM_TX_NO_DMA
devices in the future if needed.

Additionally, this patch extends validate_xmit_unreadable_skb() to
support the netkit case, where the skb is validated twice: once on the
netkit guest device and again on the physical NIC after BPF redirect or
ip forwarding.

Acked-by: Stanislav Fomichev <sdf@fomichev.me>
Signed-off-by: Bobby Eshleman <bobbyeshleman@meta.com>
Link: https://patch.msgid.link/20260514-tcp-dm-netkit-v5-3-408c59b91e66@meta.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
net/core/dev.c
net/core/devmem.c
net/core/devmem.h
net/core/netdev-genl.c

index 23c1ad3d393da03955741b514c71267b83867a07..26ac8eb9b259d489159c7ab5a2b206d425110b3b 100644 (file)
@@ -3993,7 +3993,8 @@ static struct sk_buff *validate_xmit_unreadable_skb(struct sk_buff *skb,
        struct skb_shared_info *shinfo;
        struct net_iov *niov;
 
-       if (likely(skb_frags_readable(skb)))
+       if (likely(skb_frags_readable(skb) ||
+                  dev->netmem_tx == NETMEM_TX_NO_DMA))
                goto out;
 
        if (dev->netmem_tx == NETMEM_TX_NONE)
index 468344739db29e623726d772157adab50ad3ae3f..893643909f6a4bf00337c11bda3e8718711f7d2e 100644 (file)
@@ -181,7 +181,7 @@ err_close_rxq:
 }
 
 struct net_devmem_dmabuf_binding *
-net_devmem_bind_dmabuf(struct net_device *dev,
+net_devmem_bind_dmabuf(struct net_device *dev, void *vdev,
                       struct device *dma_dev,
                       enum dma_data_direction direction,
                       unsigned int dmabuf_fd, struct netdev_nl_sock *priv,
@@ -212,6 +212,7 @@ net_devmem_bind_dmabuf(struct net_device *dev,
        }
 
        binding->dev = dev;
+       binding->vdev = vdev;
        xa_init_flags(&binding->bound_rxqs, XA_FLAGS_ALLOC);
 
        err = percpu_ref_init(&binding->ref,
@@ -396,7 +397,8 @@ struct net_devmem_dmabuf_binding *net_devmem_get_binding(struct sock *sk,
         */
        dst_dev = dst_dev_rcu(dst);
        if (unlikely(!dst_dev) ||
-           unlikely(dst_dev != READ_ONCE(binding->dev))) {
+           unlikely(dst_dev != READ_ONCE(binding->dev) &&
+                    dst_dev != READ_ONCE(binding->vdev))) {
                err = -ENODEV;
                goto out_unlock;
        }
index 1c5c18581fcb146450cfb18a4b1d0ce8a17b96a9..3852a56036cb40b4cff3d987a11057bdcf28afc8 100644 (file)
@@ -19,7 +19,13 @@ struct net_devmem_dmabuf_binding {
        struct dma_buf *dmabuf;
        struct dma_buf_attachment *attachment;
        struct sg_table *sgt;
+       /* Physical NIC that does the actual DMA for this binding. */
        struct net_device *dev;
+       /* Opaque cookie identifying the virtual device (e.g. netkit) the user
+        * called bind-tx on. Used only for pointer comparison. Never
+        * dereferenced.
+        */
+       void *vdev;
        struct gen_pool *chunk_pool;
        /* Protect dev */
        struct mutex lock;
@@ -84,7 +90,7 @@ struct dmabuf_genpool_chunk_owner {
 
 void __net_devmem_dmabuf_binding_free(struct work_struct *wq);
 struct net_devmem_dmabuf_binding *
-net_devmem_bind_dmabuf(struct net_device *dev,
+net_devmem_bind_dmabuf(struct net_device *dev, void *vdev,
                       struct device *dma_dev,
                       enum dma_data_direction direction,
                       unsigned int dmabuf_fd, struct netdev_nl_sock *priv,
@@ -165,7 +171,7 @@ static inline void net_devmem_put_net_iov(struct net_iov *niov)
 }
 
 static inline struct net_devmem_dmabuf_binding *
-net_devmem_bind_dmabuf(struct net_device *dev,
+net_devmem_bind_dmabuf(struct net_device *dev, void *vdev,
                       struct device *dma_dev,
                       enum dma_data_direction direction,
                       unsigned int dmabuf_fd,
index 4d2c49371cdb4b7768afce05412e60f62069ef76..b4d48f3672a5b1300a1e31d14de68da570768b7c 100644 (file)
@@ -1077,7 +1077,7 @@ int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info)
                goto err_rxq_bitmap;
        }
 
-       binding = net_devmem_bind_dmabuf(netdev, dma_dev, DMA_FROM_DEVICE,
+       binding = net_devmem_bind_dmabuf(netdev, NULL, dma_dev, DMA_FROM_DEVICE,
                                         dmabuf_fd, priv, info->extack);
        if (IS_ERR(binding)) {
                err = PTR_ERR(binding);
@@ -1119,9 +1119,43 @@ err_genlmsg_free:
        return err;
 }
 
+/* Find the DMA-capable device for a netmem TX binding.
+ *
+ * For NETMEM_TX_DMA devices, return the device itself.
+ * For NETMEM_TX_NO_DMA devices, walk leased RX queues to find the underlying
+ * physical device and return it.
+ */
+static struct net_device *
+netdev_find_netmem_tx_dev(struct net_device *dev)
+{
+       struct netdev_rx_queue *lease_rxq;
+       struct net_device *phys_dev;
+       int i;
+
+       if (dev->netmem_tx == NETMEM_TX_DMA)
+               return dev;
+
+       if (dev->netmem_tx != NETMEM_TX_NO_DMA)
+               return NULL;
+
+       for (i = 0; i < dev->real_num_rx_queues; i++) {
+               lease_rxq = READ_ONCE(__netif_get_rx_queue(dev, i)->lease);
+               if (!lease_rxq)
+                       continue;
+
+               phys_dev = lease_rxq->dev;
+               if (netif_device_present(phys_dev) &&
+                   phys_dev->netmem_tx == NETMEM_TX_DMA)
+                       return phys_dev;
+       }
+
+       return NULL;
+}
+
 int netdev_nl_bind_tx_doit(struct sk_buff *skb, struct genl_info *info)
 {
        struct net_devmem_dmabuf_binding *binding;
+       struct net_device *bind_dev;
        struct netdev_nl_sock *priv;
        struct net_device *netdev;
        struct device *dma_dev;
@@ -1171,22 +1205,41 @@ int netdev_nl_bind_tx_doit(struct sk_buff *skb, struct genl_info *info)
                goto err_unlock_netdev;
        }
 
-       dma_dev = netdev_queue_get_dma_dev(netdev, 0, NETDEV_QUEUE_TYPE_TX);
-       binding = net_devmem_bind_dmabuf(netdev, dma_dev, DMA_TO_DEVICE,
-                                        dmabuf_fd, priv, info->extack);
+       bind_dev = netdev_find_netmem_tx_dev(netdev);
+       if (!bind_dev) {
+               err = -EOPNOTSUPP;
+               NL_SET_ERR_MSG(info->extack,
+                              "No DMA-capable device found for netmem TX");
+               goto err_unlock_netdev;
+       }
+
+       if (bind_dev != netdev)
+               netdev_lock(bind_dev);
+
+       dma_dev = netdev_queue_get_dma_dev(bind_dev, 0, NETDEV_QUEUE_TYPE_TX);
+
+       binding = net_devmem_bind_dmabuf(bind_dev,
+                                        bind_dev != netdev ? netdev : NULL,
+                                        dma_dev, DMA_TO_DEVICE, dmabuf_fd,
+                                        priv, info->extack);
        if (IS_ERR(binding)) {
                err = PTR_ERR(binding);
-               goto err_unlock_netdev;
+               goto err_unlock_bind_dev;
        }
 
        nla_put_u32(rsp, NETDEV_A_DMABUF_ID, binding->id);
        genlmsg_end(rsp, hdr);
 
+       if (bind_dev != netdev)
+               netdev_unlock(bind_dev);
        netdev_unlock(netdev);
        mutex_unlock(&priv->lock);
 
        return genlmsg_reply(rsp, info);
 
+err_unlock_bind_dev:
+       if (bind_dev != netdev)
+               netdev_unlock(bind_dev);
 err_unlock_netdev:
        netdev_unlock(netdev);
 err_unlock_sock: