]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
net: Proxy netif_mp_{open,close}_rxq for leased queues
authorDavid Wei <dw@davidwei.uk>
Thu, 2 Apr 2026 23:10:23 +0000 (01:10 +0200)
committerJakub Kicinski <kuba@kernel.org>
Fri, 10 Apr 2026 01:21:46 +0000 (18:21 -0700)
When a process in a container wants to setup a memory provider, it will
use the virtual netdev and a leased rxq, and call netif_mp_{open,close}_rxq
to try and restart the queue. At this point, proxy the queue restart on
the real rxq in the physical netdev.

For memory providers (io_uring zero-copy rx and devmem), it causes the
real rxq in the physical netdev to be filled from a memory provider that
has DMA mapped memory from a process within a container.

Signed-off-by: David Wei <dw@davidwei.uk>
Co-developed-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
Link: https://patch.msgid.link/20260402231031.447597-7-daniel@iogearbox.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
net/core/dev.c
net/core/dev.h
net/core/netdev_rx_queue.c

index cc7bcac892af0cf9bbcc19b5010e7f6d92f9f656..2df8a2a5ecf5b9f56810b9206eed5a7d9b7d4730 100644 (file)
@@ -12350,10 +12350,8 @@ static void dev_memory_provider_uninstall(struct net_device *dev)
 
        for (i = 0; i < dev->real_num_rx_queues; i++) {
                struct netdev_rx_queue *rxq = &dev->_rx[i];
-               struct pp_memory_provider_params *p = &rxq->mp_params;
 
-               if (p->mp_ops && p->mp_ops->uninstall)
-                       p->mp_ops->uninstall(rxq->mp_params.mp_priv, rxq);
+               __netif_mp_uninstall_rxq(rxq, &rxq->mp_params);
        }
 }
 
index 6516ce2b5517b1bbb2755e31f4eee50f7bcd7bf7..95edb2d4eff8b88bd36186318723ce946015f00a 100644 (file)
@@ -12,6 +12,7 @@ struct net;
 struct netlink_ext_ack;
 struct netdev_queue_config;
 struct cpumask;
+struct pp_memory_provider_params;
 
 /* Random bits of netdevice that don't need to be exposed */
 #define FLOW_LIMIT_HISTORY     (1 << 7)  /* must be ^2 and !overflow buckets */
@@ -101,6 +102,12 @@ int netdev_queue_config_validate(struct net_device *dev, int rxq_idx,
 bool netif_rxq_has_mp(struct net_device *dev, unsigned int rxq_idx);
 bool netif_rxq_is_leased(struct net_device *dev, unsigned int rxq_idx);
 
+void __netif_mp_uninstall_rxq(struct netdev_rx_queue *rxq,
+                             const struct pp_memory_provider_params *p);
+
+void netif_rxq_cleanup_unlease(struct netdev_rx_queue *phys_rxq,
+                              struct netdev_rx_queue *virt_rxq);
+
 /* netdev management, shared between various uAPI entry points */
 struct netdev_name_node {
        struct hlist_node hlist;
index 06ac3bd5507fa08ead6ce85ae9f148cd58fd7bfc..1d6e7e47bf0a775a1fbb8413ecde1ac287ab26c6 100644 (file)
@@ -28,6 +28,8 @@ void netdev_rx_queue_unlease(struct netdev_rx_queue *rxq_dst,
        netdev_assert_locked(rxq_dst->dev);
        netdev_assert_locked(rxq_src->dev);
 
+       netif_rxq_cleanup_unlease(rxq_src, rxq_dst);
+
        WRITE_ONCE(rxq_src->lease, NULL);
        WRITE_ONCE(rxq_dst->lease, NULL);
 
@@ -200,24 +202,15 @@ int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq_idx)
 }
 EXPORT_SYMBOL_NS_GPL(netdev_rx_queue_restart, "NETDEV_INTERNAL");
 
-int netif_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx,
-                     const struct pp_memory_provider_params *p,
-                     struct netlink_ext_ack *extack)
+static int __netif_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx,
+                              const struct pp_memory_provider_params *p,
+                              struct netlink_ext_ack *extack)
 {
        const struct netdev_queue_mgmt_ops *qops = dev->queue_mgmt_ops;
        struct netdev_queue_config qcfg[2];
        struct netdev_rx_queue *rxq;
        int ret;
 
-       if (!netdev_need_ops_lock(dev))
-               return -EOPNOTSUPP;
-
-       if (rxq_idx >= dev->real_num_rx_queues) {
-               NL_SET_ERR_MSG(extack, "rx queue index out of range");
-               return -ERANGE;
-       }
-       rxq_idx = array_index_nospec(rxq_idx, dev->real_num_rx_queues);
-
        if (dev->cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED) {
                NL_SET_ERR_MSG(extack, "tcp-data-split is disabled");
                return -EINVAL;
@@ -264,16 +257,48 @@ err_clear_mp:
        return ret;
 }
 
-void netif_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx,
-                       const struct pp_memory_provider_params *old_p)
+int netif_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx,
+                     const struct pp_memory_provider_params *p,
+                     struct netlink_ext_ack *extack)
+{
+       struct net_device *orig_dev = dev;
+       int ret;
+
+       if (!netdev_need_ops_lock(dev))
+               return -EOPNOTSUPP;
+
+       if (rxq_idx >= dev->real_num_rx_queues) {
+               NL_SET_ERR_MSG(extack, "rx queue index out of range");
+               return -ERANGE;
+       }
+       rxq_idx = array_index_nospec(rxq_idx, dev->real_num_rx_queues);
+
+       if (!netif_rxq_is_leased(dev, rxq_idx))
+               return __netif_mp_open_rxq(dev, rxq_idx, p, extack);
+
+       if (!netif_get_rx_queue_lease_locked(&dev, &rxq_idx)) {
+               NL_SET_ERR_MSG(extack, "rx queue leased to a virtual netdev");
+               return -EBUSY;
+       }
+       if (!dev->dev.parent) {
+               NL_SET_ERR_MSG(extack, "rx queue belongs to a virtual netdev");
+               ret = -EOPNOTSUPP;
+               goto out;
+       }
+
+       ret = __netif_mp_open_rxq(dev, rxq_idx, p, extack);
+out:
+       netif_put_rx_queue_lease_locked(orig_dev, dev);
+       return ret;
+}
+
+static void __netif_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx,
+                                const struct pp_memory_provider_params *old_p)
 {
        struct netdev_queue_config qcfg[2];
        struct netdev_rx_queue *rxq;
        int err;
 
-       if (WARN_ON_ONCE(ifq_idx >= dev->real_num_rx_queues))
-               return;
-
        rxq = __netif_get_rx_queue(dev, ifq_idx);
 
        /* Callers holding a netdev ref may get here after we already
@@ -294,3 +319,48 @@ void netif_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx,
        err = netdev_rx_queue_reconfig(dev, ifq_idx, &qcfg[0], &qcfg[1]);
        WARN_ON(err && err != -ENETDOWN);
 }
+
+void netif_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx,
+                       const struct pp_memory_provider_params *old_p)
+{
+       struct net_device *orig_dev = dev;
+
+       if (WARN_ON_ONCE(ifq_idx >= dev->real_num_rx_queues))
+               return;
+       if (!netif_rxq_is_leased(dev, ifq_idx))
+               return __netif_mp_close_rxq(dev, ifq_idx, old_p);
+
+       if (WARN_ON_ONCE(!netif_get_rx_queue_lease_locked(&dev, &ifq_idx)))
+               return;
+
+       __netif_mp_close_rxq(dev, ifq_idx, old_p);
+       netif_put_rx_queue_lease_locked(orig_dev, dev);
+}
+
+void __netif_mp_uninstall_rxq(struct netdev_rx_queue *rxq,
+                             const struct pp_memory_provider_params *p)
+{
+       if (p->mp_ops && p->mp_ops->uninstall)
+               p->mp_ops->uninstall(p->mp_priv, rxq);
+}
+
+/* Clean up memory provider state when a queue lease is torn down. If
+ * a memory provider was installed on the physical queue via the lease,
+ * close it now. The memory provider is a property of the queue itself,
+ * and it was _guaranteed_ to be installed on the physical queue via
+ * the lease redirection. The extra __netif_mp_close_rxq is needed
+ * since the physical queue can outlive the virtual queue in the lease
+ * case, so it needs to be reconfigured to clear the memory provider.
+ */
+void netif_rxq_cleanup_unlease(struct netdev_rx_queue *phys_rxq,
+                              struct netdev_rx_queue *virt_rxq)
+{
+       struct pp_memory_provider_params *p = &phys_rxq->mp_params;
+       unsigned int ifq_idx = get_netdev_rx_queue_index(phys_rxq);
+
+       if (!p->mp_ops)
+               return;
+
+       __netif_mp_uninstall_rxq(virt_rxq, p);
+       __netif_mp_close_rxq(phys_rxq->dev, ifq_idx, p);
+}