]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
net: Proxy net_mp_{open,close}_rxq for leased queues
authorDavid Wei <dw@davidwei.uk>
Thu, 15 Jan 2026 08:25:52 +0000 (09:25 +0100)
committerPaolo Abeni <pabeni@redhat.com>
Tue, 20 Jan 2026 10:58:49 +0000 (11:58 +0100)
When a process in a container wants to setup a memory provider, it will
use the virtual netdev and a leased rxq, and call net_mp_{open,close}_rxq
to try and restart the queue. At this point, proxy the queue restart on
the real rxq in the physical netdev.

For memory providers (io_uring zero-copy rx and devmem), it causes the
real rxq in the physical netdev to be filled from a memory provider that
has DMA mapped memory from a process within a container.

Signed-off-by: David Wei <dw@davidwei.uk>
Co-developed-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
Acked-by: Stanislav Fomichev <sdf@fomichev.me>
Link: https://patch.msgid.link/20260115082603.219152-6-daniel@iogearbox.net
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
include/net/netdev_rx_queue.h
include/net/page_pool/memory_provider.h
net/core/netdev_rx_queue.c

index de04fdfdad72be36f029743da1d36ee5226adc53..508d11afaecbf0d64316640a9b83378bd262fa1a 100644 (file)
@@ -73,4 +73,8 @@ enum netif_lease_dir {
 struct netdev_rx_queue *
 __netif_get_rx_queue_lease(struct net_device **dev, unsigned int *rxq,
                           enum netif_lease_dir dir);
+struct netdev_rx_queue *
+netif_get_rx_queue_lease_locked(struct net_device **dev, unsigned int *rxq);
+void netif_put_rx_queue_lease_locked(struct net_device *orig_dev,
+                                    struct net_device *dev);
 #endif /* _LINUX_NETDEV_RX_QUEUE_H */
index ada4f968960aec851f8a4c66180d0811dede074a..b6f811c3416b6faa1f0173b93a00cb0920161b3c 100644 (file)
@@ -23,12 +23,12 @@ bool net_mp_niov_set_dma_addr(struct net_iov *niov, dma_addr_t addr);
 void net_mp_niov_set_page_pool(struct page_pool *pool, struct net_iov *niov);
 void net_mp_niov_clear_page_pool(struct net_iov *niov);
 
-int net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx,
+int net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx,
                    struct pp_memory_provider_params *p);
 int __net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx,
                      const struct pp_memory_provider_params *p,
                      struct netlink_ext_ack *extack);
-void net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx,
+void net_mp_close_rxq(struct net_device *dev, unsigned int rxq_idx,
                      struct pp_memory_provider_params *old_p);
 void __net_mp_close_rxq(struct net_device *dev, unsigned int rxq_idx,
                        const struct pp_memory_provider_params *old_p);
index 61fe25817e98d3e2be28b4ecb78955d4f89df9c8..75c7a68cb90de016d24f389bc7395365d1bce963 100644 (file)
@@ -67,6 +67,29 @@ __netif_get_rx_queue_lease(struct net_device **dev, unsigned int *rxq_idx,
        return rxq;
 }
 
+struct netdev_rx_queue *
+netif_get_rx_queue_lease_locked(struct net_device **dev, unsigned int *rxq_idx)
+{
+       struct net_device *orig_dev = *dev;
+       struct netdev_rx_queue *rxq;
+
+       /* Locking order is always from the virtual to the physical device
+        * see netdev_nl_queue_create_doit().
+        */
+       netdev_ops_assert_locked(orig_dev);
+       rxq = __netif_get_rx_queue_lease(dev, rxq_idx, NETIF_VIRT_TO_PHYS);
+       if (rxq && orig_dev != *dev)
+               netdev_lock(*dev);
+       return rxq;
+}
+
+void netif_put_rx_queue_lease_locked(struct net_device *orig_dev,
+                                    struct net_device *dev)
+{
+       if (orig_dev != dev)
+               netdev_unlock(dev);
+}
+
 bool netif_rx_queue_lease_get_owner(struct net_device **dev,
                                    unsigned int *rxq_idx)
 {
@@ -183,49 +206,63 @@ int __net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx,
                      const struct pp_memory_provider_params *p,
                      struct netlink_ext_ack *extack)
 {
+       struct net_device *orig_dev = dev;
        struct netdev_rx_queue *rxq;
        int ret;
 
        if (!netdev_need_ops_lock(dev))
                return -EOPNOTSUPP;
-
        if (rxq_idx >= dev->real_num_rx_queues) {
                NL_SET_ERR_MSG(extack, "rx queue index out of range");
                return -ERANGE;
        }
-       rxq_idx = array_index_nospec(rxq_idx, dev->real_num_rx_queues);
 
+       rxq_idx = array_index_nospec(rxq_idx, dev->real_num_rx_queues);
+       rxq = netif_get_rx_queue_lease_locked(&dev, &rxq_idx);
+       if (!rxq) {
+               NL_SET_ERR_MSG(extack, "rx queue peered to a virtual netdev");
+               return -EBUSY;
+       }
+       if (!dev->dev.parent) {
+               NL_SET_ERR_MSG(extack, "rx queue is mapped to a virtual netdev");
+               ret = -EBUSY;
+               goto out;
+       }
        if (dev->cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED) {
                NL_SET_ERR_MSG(extack, "tcp-data-split is disabled");
-               return -EINVAL;
+               ret = -EINVAL;
+               goto out;
        }
        if (dev->cfg->hds_thresh) {
                NL_SET_ERR_MSG(extack, "hds-thresh is not zero");
-               return -EINVAL;
+               ret = -EINVAL;
+               goto out;
        }
        if (dev_xdp_prog_count(dev)) {
                NL_SET_ERR_MSG(extack, "unable to custom memory provider to device with XDP program attached");
-               return -EEXIST;
+               ret = -EEXIST;
+               goto out;
        }
-
-       rxq = __netif_get_rx_queue(dev, rxq_idx);
        if (rxq->mp_params.mp_ops) {
                NL_SET_ERR_MSG(extack, "designated queue already memory provider bound");
-               return -EEXIST;
+               ret = -EEXIST;
+               goto out;
        }
 #ifdef CONFIG_XDP_SOCKETS
        if (rxq->pool) {
                NL_SET_ERR_MSG(extack, "designated queue already in use by AF_XDP");
-               return -EBUSY;
+               ret = -EBUSY;
+               goto out;
        }
 #endif
-
        rxq->mp_params = *p;
        ret = netdev_rx_queue_restart(dev, rxq_idx);
        if (ret) {
                rxq->mp_params.mp_ops = NULL;
                rxq->mp_params.mp_priv = NULL;
        }
+out:
+       netif_put_rx_queue_lease_locked(orig_dev, dev);
        return ret;
 }
 
@@ -240,38 +277,43 @@ int net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx,
        return ret;
 }
 
-void __net_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx,
+void __net_mp_close_rxq(struct net_device *dev, unsigned int rxq_idx,
                        const struct pp_memory_provider_params *old_p)
 {
+       struct net_device *orig_dev = dev;
        struct netdev_rx_queue *rxq;
        int err;
 
-       if (WARN_ON_ONCE(ifq_idx >= dev->real_num_rx_queues))
+       if (WARN_ON_ONCE(rxq_idx >= dev->real_num_rx_queues))
                return;
 
-       rxq = __netif_get_rx_queue(dev, ifq_idx);
+       rxq = netif_get_rx_queue_lease_locked(&dev, &rxq_idx);
+       if (WARN_ON_ONCE(!rxq))
+               return;
 
        /* Callers holding a netdev ref may get here after we already
         * went thru shutdown via dev_memory_provider_uninstall().
         */
        if (dev->reg_state > NETREG_REGISTERED &&
            !rxq->mp_params.mp_ops)
-               return;
+               goto out;
 
        if (WARN_ON_ONCE(rxq->mp_params.mp_ops != old_p->mp_ops ||
                         rxq->mp_params.mp_priv != old_p->mp_priv))
-               return;
+               goto out;
 
        rxq->mp_params.mp_ops = NULL;
        rxq->mp_params.mp_priv = NULL;
-       err = netdev_rx_queue_restart(dev, ifq_idx);
+       err = netdev_rx_queue_restart(dev, rxq_idx);
        WARN_ON(err && err != -ENETDOWN);
+out:
+       netif_put_rx_queue_lease_locked(orig_dev, dev);
 }
 
-void net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx,
+void net_mp_close_rxq(struct net_device *dev, unsigned int rxq_idx,
                      struct pp_memory_provider_params *old_p)
 {
        netdev_lock(dev);
-       __net_mp_close_rxq(dev, ifq_idx, old_p);
+       __net_mp_close_rxq(dev, rxq_idx, old_p);
        netdev_unlock(dev);
 }