From 8766d61a1d33cb5f15bfdd6ce9832bbe1fc649c2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 20 Jan 2026 18:04:55 -0800 Subject: [PATCH] Revert "Merge branch 'netkit-support-for-io_uring-zero-copy-and-af_xdp'" This reverts commit 77b9c4a438fc66e2ab004c411056b3fb71a54f2c, reversing changes made to 4515ec4ad58a37e70a9e1256c0b993958c9b7497: 931420a2fc36 ("selftests/net: Add netkit container tests") ab771c938d9a ("selftests/net: Make NetDrvContEnv support queue leasing") 6be87fbb2776 ("selftests/net: Add env for container based tests") 61d99ce3dfc2 ("selftests/net: Add bpf skb forwarding program") 920da3634194 ("netkit: Add xsk support for af_xdp applications") eef51113f8af ("netkit: Add netkit notifier to check for unregistering devices") b5ef109d22d4 ("netkit: Implement rtnl_link_ops->alloc and ndo_queue_create") b5c3fa4a0b16 ("netkit: Add single device mode for netkit") 0073d2fd679d ("xsk: Proxy pool management for leased queues") 1ecea95dd3b5 ("xsk: Extend xsk_rcv_check validation") 804bf334d08a ("net: Proxy netdev_queue_get_dma_dev for leased queues") 0caa9a8ddec3 ("net: Proxy net_mp_{open,close}_rxq for leased queues") ff8889ff9107 ("net, ethtool: Disallow leased real rxqs to be resized") 9e2103f36110 ("net: Add lease info to queue-get response") 31127deddef4 ("net: Implement netdev_nl_queue_create_doit") a5546e18f77c ("net: Add queue-create operation") The series will conflict with io_uring work, and the code needs more polish. Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/netdev.yaml | 44 --- drivers/net/netkit.c | 360 +++--------------- include/linux/netdevice.h | 6 - include/net/netdev_queues.h | 19 +- include/net/netdev_rx_queue.h | 21 +- include/net/page_pool/memory_provider.h | 4 +- include/net/xdp_sock_drv.h | 2 +- include/uapi/linux/if_link.h | 6 - include/uapi/linux/netdev.h | 11 - net/core/dev.c | 7 - net/core/dev.h | 2 - net/core/netdev-genl-gen.c | 20 - net/core/netdev-genl-gen.h | 2 - net/core/netdev-genl.c | 185 --------- net/core/netdev_queues.c | 74 +--- net/core/netdev_rx_queue.c | 169 ++------ net/ethtool/channels.c | 12 +- net/ethtool/ioctl.c | 9 +- net/xdp/xsk.c | 79 +--- tools/include/uapi/linux/netdev.h | 11 - .../testing/selftests/drivers/net/README.rst | 7 - .../testing/selftests/drivers/net/hw/Makefile | 2 - .../drivers/net/hw/lib/py/__init__.py | 7 +- .../selftests/drivers/net/hw/nk_forward.bpf.c | 49 --- .../selftests/drivers/net/hw/nk_netns.py | 23 -- .../selftests/drivers/net/hw/nk_qlease.py | 55 --- .../selftests/drivers/net/lib/py/__init__.py | 7 +- .../selftests/drivers/net/lib/py/env.py | 157 -------- 28 files changed, 117 insertions(+), 1233 deletions(-) delete mode 100644 tools/testing/selftests/drivers/net/hw/nk_forward.bpf.c delete mode 100755 tools/testing/selftests/drivers/net/hw/nk_netns.py delete mode 100755 tools/testing/selftests/drivers/net/hw/nk_qlease.py diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index b86db8656eacd..596c306ce52b8 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -339,15 +339,6 @@ attribute-sets: doc: XSK information for this queue, if any. type: nest nested-attributes: xsk-info - - - name: lease - doc: | - A queue from a virtual device can have a lease which refers to - another queue from a physical device. This is useful for memory - providers and AF_XDP operations which take an ifindex and queue id - to allow applications to bind against virtual devices in containers. - type: nest - nested-attributes: lease - name: qstats doc: | @@ -546,24 +537,6 @@ attribute-sets: name: id - name: type - - - name: lease - attributes: - - - name: ifindex - doc: The netdev ifindex to lease the queue from. - type: u32 - checks: - min: 1 - - - name: queue - doc: The netdev queue to lease from. - type: nest - nested-attributes: queue-id - - - name: netns-id - doc: The network namespace id of the netdev. - type: s32 - name: dmabuf attributes: @@ -713,7 +686,6 @@ operations: - dmabuf - io-uring - xsk - - lease dump: request: attributes: @@ -825,22 +797,6 @@ operations: reply: attributes: - id - - - name: queue-create - doc: | - Create a new queue for the given netdevice. Whether this operation - is supported depends on the device and the driver. - attribute-set: queue - flags: [admin-perm] - do: - request: - attributes: - - ifindex - - type - - lease - reply: &queue-create-op - attributes: - - id kernel-family: headers: ["net/netdev_netlink.h"] diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c index 0519f855d062c..0a2fef7caccb9 100644 --- a/drivers/net/netkit.c +++ b/drivers/net/netkit.c @@ -9,21 +9,11 @@ #include #include -#include -#include -#include -#include #include #include #include -#define NETKIT_DRV_NAME "netkit" - -#define NETKIT_NUM_RX_QUEUES_MAX 1024 -#define NETKIT_NUM_TX_QUEUES_MAX 1 - -#define NETKIT_NUM_RX_QUEUES_REAL 1 -#define NETKIT_NUM_TX_QUEUES_REAL 1 +#define DRV_NAME "netkit" struct netkit { __cacheline_group_begin(netkit_fastpath); @@ -36,7 +26,6 @@ struct netkit { __cacheline_group_begin(netkit_slowpath); enum netkit_mode mode; - enum netkit_pairing pair; bool primary; u32 headroom; __cacheline_group_end(netkit_slowpath); @@ -47,8 +36,6 @@ struct netkit_link { struct net_device *dev; }; -static struct rtnl_link_ops netkit_link_ops; - static __always_inline int netkit_run(const struct bpf_mprog_entry *entry, struct sk_buff *skb, enum netkit_action ret) @@ -148,10 +135,6 @@ static int netkit_open(struct net_device *dev) struct netkit *nk = netkit_priv(dev); struct net_device *peer = rtnl_dereference(nk->peer); - if (nk->pair == NETKIT_DEVICE_SINGLE) { - netif_carrier_on(dev); - return 0; - } if (!peer) return -ENOTCONN; if (peer->flags & IFF_UP) { @@ -236,86 +219,9 @@ static void netkit_get_stats(struct net_device *dev, stats->tx_dropped = DEV_STATS_READ(dev, tx_dropped); } -static bool netkit_xsk_supported_at_phys(const struct net_device *dev) -{ - if (!dev->netdev_ops->ndo_bpf || - !dev->netdev_ops->ndo_xdp_xmit || - !dev->netdev_ops->ndo_xsk_wakeup) - return false; - if ((dev->xdp_features & NETDEV_XDP_ACT_XSK) != NETDEV_XDP_ACT_XSK) - return false; - return true; -} - -static int netkit_xsk(struct net_device *dev, struct netdev_bpf *xdp) -{ - struct netkit *nk = netkit_priv(dev); - struct netdev_bpf xdp_lower; - struct netdev_rx_queue *rxq; - struct net_device *phys; - int ret = -EBUSY; - - switch (xdp->command) { - case XDP_SETUP_XSK_POOL: - if (nk->pair == NETKIT_DEVICE_PAIR) - return -EOPNOTSUPP; - if (xdp->xsk.queue_id >= dev->real_num_rx_queues) - return -EINVAL; - - rxq = __netif_get_rx_queue(dev, xdp->xsk.queue_id); - if (!rxq->lease) - return -EOPNOTSUPP; - - phys = rxq->lease->dev; - if (!netkit_xsk_supported_at_phys(phys)) - return -EOPNOTSUPP; - - memcpy(&xdp_lower, xdp, sizeof(xdp_lower)); - xdp_lower.xsk.queue_id = get_netdev_rx_queue_index(rxq->lease); - break; - case XDP_SETUP_PROG: - return -EPERM; - default: - return -EINVAL; - } - - netdev_lock(phys); - if (!dev_get_min_mp_channel_count(phys)) - ret = phys->netdev_ops->ndo_bpf(phys, &xdp_lower); - netdev_unlock(phys); - return ret; -} - -static int netkit_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) -{ - struct netdev_rx_queue *rxq; - struct net_device *phys; - - if (queue_id >= dev->real_num_rx_queues) - return -EINVAL; - - rxq = __netif_get_rx_queue(dev, queue_id); - if (!rxq->lease) - return -EOPNOTSUPP; - - phys = rxq->lease->dev; - if (!netkit_xsk_supported_at_phys(phys)) - return -EOPNOTSUPP; - - return phys->netdev_ops->ndo_xsk_wakeup(phys, - get_netdev_rx_queue_index(rxq->lease), flags); -} - -static int netkit_init(struct net_device *dev) -{ - netdev_lockdep_set_classes(dev); - return 0; -} - static void netkit_uninit(struct net_device *dev); static const struct net_device_ops netkit_netdev_ops = { - .ndo_init = netkit_init, .ndo_open = netkit_open, .ndo_stop = netkit_close, .ndo_start_xmit = netkit_xmit, @@ -326,95 +232,19 @@ static const struct net_device_ops netkit_netdev_ops = { .ndo_get_peer_dev = netkit_peer_dev, .ndo_get_stats64 = netkit_get_stats, .ndo_uninit = netkit_uninit, - .ndo_bpf = netkit_xsk, - .ndo_xsk_wakeup = netkit_xsk_wakeup, .ndo_features_check = passthru_features_check, }; static void netkit_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { - strscpy(info->driver, NETKIT_DRV_NAME, sizeof(info->driver)); + strscpy(info->driver, DRV_NAME, sizeof(info->driver)); } static const struct ethtool_ops netkit_ethtool_ops = { .get_drvinfo = netkit_get_drvinfo, }; -static int netkit_queue_create(struct net_device *dev) -{ - struct netkit *nk = netkit_priv(dev); - u32 rxq_count_old, rxq_count_new; - int err; - - rxq_count_old = dev->real_num_rx_queues; - rxq_count_new = rxq_count_old + 1; - - /* Only allow to lease a queue in single device mode or to - * lease against the peer device which then ends up in the - * target netns. - */ - if (nk->pair == NETKIT_DEVICE_PAIR && nk->primary) - return -EOPNOTSUPP; - - if (netif_running(dev)) - netif_carrier_off(dev); - err = netif_set_real_num_rx_queues(dev, rxq_count_new); - if (netif_running(dev)) - netif_carrier_on(dev); - - return err ? : rxq_count_old; -} - -static const struct netdev_queue_mgmt_ops netkit_queue_mgmt_ops = { - .ndo_queue_create = netkit_queue_create, -}; - -static struct net_device *netkit_alloc(struct nlattr *tb[], - const char *ifname, - unsigned char name_assign_type, - unsigned int num_tx_queues, - unsigned int num_rx_queues) -{ - const struct rtnl_link_ops *ops = &netkit_link_ops; - struct net_device *dev; - - if (num_tx_queues > NETKIT_NUM_TX_QUEUES_MAX || - num_rx_queues > NETKIT_NUM_RX_QUEUES_MAX) - return ERR_PTR(-EOPNOTSUPP); - - dev = alloc_netdev_mqs(ops->priv_size, ifname, - name_assign_type, ops->setup, - num_tx_queues, num_rx_queues); - if (dev) { - dev->real_num_tx_queues = NETKIT_NUM_TX_QUEUES_REAL; - dev->real_num_rx_queues = NETKIT_NUM_RX_QUEUES_REAL; - } - return dev; -} - -static void netkit_queue_unlease(struct net_device *dev) -{ - struct netdev_rx_queue *rxq, *rxq_lease; - struct net_device *dev_lease; - int i; - - if (dev->real_num_rx_queues == 1) - return; - - netdev_lock(dev); - for (i = 1; i < dev->real_num_rx_queues; i++) { - rxq = __netif_get_rx_queue(dev, i); - rxq_lease = rxq->lease; - dev_lease = rxq_lease->dev; - - netdev_lock(dev_lease); - netdev_rx_queue_unlease(rxq, rxq_lease); - netdev_unlock(dev_lease); - } - netdev_unlock(dev); -} - static void netkit_setup(struct net_device *dev) { static const netdev_features_t netkit_features_hw_vlan = @@ -445,20 +275,18 @@ static void netkit_setup(struct net_device *dev) dev->priv_flags |= IFF_DISABLE_NETPOLL; dev->lltx = true; - dev->netdev_ops = &netkit_netdev_ops; - dev->ethtool_ops = &netkit_ethtool_ops; - dev->queue_mgmt_ops = &netkit_queue_mgmt_ops; + dev->ethtool_ops = &netkit_ethtool_ops; + dev->netdev_ops = &netkit_netdev_ops; dev->features |= netkit_features; dev->hw_features = netkit_features; dev->hw_enc_features = netkit_features; dev->mpls_features = NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE; dev->vlan_features = dev->features & ~netkit_features_hw_vlan; + dev->needs_free_netdev = true; netif_set_tso_max_size(dev, GSO_MAX_SIZE); - - xdp_set_features_flag(dev, NETDEV_XDP_ACT_XSK); } static struct net *netkit_get_link_net(const struct net_device *dev) @@ -497,6 +325,8 @@ static int netkit_validate(struct nlattr *tb[], struct nlattr *data[], return 0; } +static struct rtnl_link_ops netkit_link_ops; + static int netkit_new_link(struct net_device *dev, struct rtnl_newlink_params *params, struct netlink_ext_ack *extack) @@ -505,7 +335,6 @@ static int netkit_new_link(struct net_device *dev, enum netkit_scrub scrub_prim = NETKIT_SCRUB_DEFAULT; enum netkit_scrub scrub_peer = NETKIT_SCRUB_DEFAULT; struct nlattr *peer_tb[IFLA_MAX + 1], **tbp, *attr; - enum netkit_pairing pair = NETKIT_DEVICE_PAIR; enum netkit_action policy_prim = NETKIT_PASS; enum netkit_action policy_peer = NETKIT_PASS; struct nlattr **data = params->data; @@ -514,8 +343,7 @@ static int netkit_new_link(struct net_device *dev, struct nlattr **tb = params->tb; u16 headroom = 0, tailroom = 0; struct ifinfomsg *ifmp = NULL; - struct net_device *peer = NULL; - bool seen_peer = false; + struct net_device *peer; char ifname[IFNAMSIZ]; struct netkit *nk; int err; @@ -552,12 +380,6 @@ static int netkit_new_link(struct net_device *dev, headroom = nla_get_u16(data[IFLA_NETKIT_HEADROOM]); if (data[IFLA_NETKIT_TAILROOM]) tailroom = nla_get_u16(data[IFLA_NETKIT_TAILROOM]); - if (data[IFLA_NETKIT_PAIRING]) - pair = nla_get_u32(data[IFLA_NETKIT_PAIRING]); - - seen_peer = data[IFLA_NETKIT_PEER_INFO] || - data[IFLA_NETKIT_PEER_SCRUB] || - data[IFLA_NETKIT_PEER_POLICY]; } if (ifmp && tbp[IFLA_IFNAME]) { @@ -570,46 +392,45 @@ static int netkit_new_link(struct net_device *dev, if (mode != NETKIT_L2 && (tb[IFLA_ADDRESS] || tbp[IFLA_ADDRESS])) return -EOPNOTSUPP; - if (pair == NETKIT_DEVICE_SINGLE && - (tb != tbp || seen_peer || policy_prim != NETKIT_PASS)) - return -EOPNOTSUPP; - if (pair == NETKIT_DEVICE_PAIR) { - peer = rtnl_create_link(peer_net, ifname, ifname_assign_type, - &netkit_link_ops, tbp, extack); - if (IS_ERR(peer)) - return PTR_ERR(peer); - - netif_inherit_tso_max(peer, dev); - if (headroom) - peer->needed_headroom = headroom; - if (tailroom) - peer->needed_tailroom = tailroom; - if (mode == NETKIT_L2 && !(ifmp && tbp[IFLA_ADDRESS])) - eth_hw_addr_random(peer); - if (ifmp && dev->ifindex) - peer->ifindex = ifmp->ifi_index; + peer = rtnl_create_link(peer_net, ifname, ifname_assign_type, + &netkit_link_ops, tbp, extack); + if (IS_ERR(peer)) + return PTR_ERR(peer); - nk = netkit_priv(peer); - nk->primary = false; - nk->policy = policy_peer; - nk->scrub = scrub_peer; - nk->mode = mode; - nk->pair = pair; - nk->headroom = headroom; - bpf_mprog_bundle_init(&nk->bundle); - - err = register_netdevice(peer); - if (err < 0) - goto err_register_peer; - netif_carrier_off(peer); - if (mode == NETKIT_L2) - dev_change_flags(peer, peer->flags & ~IFF_NOARP, NULL); - - err = rtnl_configure_link(peer, NULL, 0, NULL); - if (err < 0) - goto err_configure_peer; + netif_inherit_tso_max(peer, dev); + if (headroom) { + peer->needed_headroom = headroom; + dev->needed_headroom = headroom; } + if (tailroom) { + peer->needed_tailroom = tailroom; + dev->needed_tailroom = tailroom; + } + + if (mode == NETKIT_L2 && !(ifmp && tbp[IFLA_ADDRESS])) + eth_hw_addr_random(peer); + if (ifmp && dev->ifindex) + peer->ifindex = ifmp->ifi_index; + + nk = netkit_priv(peer); + nk->primary = false; + nk->policy = policy_peer; + nk->scrub = scrub_peer; + nk->mode = mode; + nk->headroom = headroom; + bpf_mprog_bundle_init(&nk->bundle); + + err = register_netdevice(peer); + if (err < 0) + goto err_register_peer; + netif_carrier_off(peer); + if (mode == NETKIT_L2) + dev_change_flags(peer, peer->flags & ~IFF_NOARP, NULL); + + err = rtnl_configure_link(peer, NULL, 0, NULL); + if (err < 0) + goto err_configure_peer; if (mode == NETKIT_L2 && !tb[IFLA_ADDRESS]) eth_hw_addr_random(dev); @@ -617,17 +438,12 @@ static int netkit_new_link(struct net_device *dev, nla_strscpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ); else strscpy(dev->name, "nk%d", IFNAMSIZ); - if (headroom) - dev->needed_headroom = headroom; - if (tailroom) - dev->needed_tailroom = tailroom; nk = netkit_priv(dev); nk->primary = true; nk->policy = policy_prim; nk->scrub = scrub_prim; nk->mode = mode; - nk->pair = pair; nk->headroom = headroom; bpf_mprog_bundle_init(&nk->bundle); @@ -639,12 +455,10 @@ static int netkit_new_link(struct net_device *dev, dev_change_flags(dev, dev->flags & ~IFF_NOARP, NULL); rcu_assign_pointer(netkit_priv(dev)->peer, peer); - if (peer) - rcu_assign_pointer(netkit_priv(peer)->peer, dev); + rcu_assign_pointer(netkit_priv(peer)->peer, dev); return 0; err_configure_peer: - if (peer) - unregister_netdevice(peer); + unregister_netdevice(peer); return err; err_register_peer: free_netdev(peer); @@ -704,8 +518,6 @@ static struct net_device *netkit_dev_fetch(struct net *net, u32 ifindex, u32 whi nk = netkit_priv(dev); if (!nk->primary) return ERR_PTR(-EACCES); - if (nk->pair == NETKIT_DEVICE_SINGLE) - return ERR_PTR(-EOPNOTSUPP); if (which == BPF_NETKIT_PEER) { dev = rcu_dereference_rtnl(nk->peer); if (!dev) @@ -1032,7 +844,6 @@ static void netkit_release_all(struct net_device *dev) static void netkit_uninit(struct net_device *dev) { netkit_release_all(dev); - netkit_queue_unlease(dev); } static void netkit_del_link(struct net_device *dev, struct list_head *head) @@ -1068,7 +879,6 @@ static int netkit_change_link(struct net_device *dev, struct nlattr *tb[], { IFLA_NETKIT_PEER_INFO, "peer info" }, { IFLA_NETKIT_HEADROOM, "headroom" }, { IFLA_NETKIT_TAILROOM, "tailroom" }, - { IFLA_NETKIT_PAIRING, "pairing" }, }; if (!nk->primary) { @@ -1088,11 +898,9 @@ static int netkit_change_link(struct net_device *dev, struct nlattr *tb[], } if (data[IFLA_NETKIT_POLICY]) { - err = -EOPNOTSUPP; attr = data[IFLA_NETKIT_POLICY]; policy = nla_get_u32(attr); - if (nk->pair == NETKIT_DEVICE_PAIR) - err = netkit_check_policy(policy, attr, extack); + err = netkit_check_policy(policy, attr, extack); if (err) return err; WRITE_ONCE(nk->policy, policy); @@ -1113,48 +921,6 @@ static int netkit_change_link(struct net_device *dev, struct nlattr *tb[], return 0; } -static void netkit_check_lease_unregister(struct net_device *dev) -{ - LIST_HEAD(list_kill); - u32 q_idx; - - if (READ_ONCE(dev->reg_state) != NETREG_UNREGISTERING || - !dev->dev.parent) - return; - - netdev_lock_ops(dev); - for (q_idx = 0; q_idx < dev->real_num_rx_queues; q_idx++) { - struct net_device *tmp = dev; - u32 tmp_q_idx = q_idx; - - if (netif_rx_queue_lease_get_owner(&tmp, &tmp_q_idx)) { - if (tmp->netdev_ops != &netkit_netdev_ops) - continue; - /* A single phys device can have multiple queues leased - * to one netkit device. We can only queue that netkit - * device once to the list_kill. Queues of that phys - * device can be leased with different individual netkit - * devices, hence we batch via list_kill. - */ - if (unregister_netdevice_queued(tmp)) - continue; - netkit_del_link(tmp, &list_kill); - } - } - netdev_unlock_ops(dev); - unregister_netdevice_many(&list_kill); -} - -static int netkit_notifier(struct notifier_block *this, - unsigned long event, void *ptr) -{ - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - - if (event == NETDEV_UNREGISTER) - netkit_check_lease_unregister(dev); - return NOTIFY_DONE; -} - static size_t netkit_get_size(const struct net_device *dev) { return nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_POLICY */ @@ -1165,7 +931,6 @@ static size_t netkit_get_size(const struct net_device *dev) nla_total_size(sizeof(u8)) + /* IFLA_NETKIT_PRIMARY */ nla_total_size(sizeof(u16)) + /* IFLA_NETKIT_HEADROOM */ nla_total_size(sizeof(u16)) + /* IFLA_NETKIT_TAILROOM */ - nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_PAIRING */ 0; } @@ -1186,8 +951,6 @@ static int netkit_fill_info(struct sk_buff *skb, const struct net_device *dev) return -EMSGSIZE; if (nla_put_u16(skb, IFLA_NETKIT_TAILROOM, dev->needed_tailroom)) return -EMSGSIZE; - if (nla_put_u32(skb, IFLA_NETKIT_PAIRING, nk->pair)) - return -EMSGSIZE; if (peer) { nk = netkit_priv(peer); @@ -1209,15 +972,13 @@ static const struct nla_policy netkit_policy[IFLA_NETKIT_MAX + 1] = { [IFLA_NETKIT_TAILROOM] = { .type = NLA_U16 }, [IFLA_NETKIT_SCRUB] = NLA_POLICY_MAX(NLA_U32, NETKIT_SCRUB_DEFAULT), [IFLA_NETKIT_PEER_SCRUB] = NLA_POLICY_MAX(NLA_U32, NETKIT_SCRUB_DEFAULT), - [IFLA_NETKIT_PAIRING] = NLA_POLICY_MAX(NLA_U32, NETKIT_DEVICE_SINGLE), [IFLA_NETKIT_PRIMARY] = { .type = NLA_REJECT, .reject_message = "Primary attribute is read-only" }, }; static struct rtnl_link_ops netkit_link_ops = { - .kind = NETKIT_DRV_NAME, + .kind = DRV_NAME, .priv_size = sizeof(struct netkit), - .alloc = netkit_alloc, .setup = netkit_setup, .newlink = netkit_new_link, .dellink = netkit_del_link, @@ -1231,39 +992,26 @@ static struct rtnl_link_ops netkit_link_ops = { .maxtype = IFLA_NETKIT_MAX, }; -static struct notifier_block netkit_netdev_notifier = { - .notifier_call = netkit_notifier, -}; - -static __init int netkit_mod_init(void) +static __init int netkit_init(void) { - int ret; - BUILD_BUG_ON((int)NETKIT_NEXT != (int)TCX_NEXT || (int)NETKIT_PASS != (int)TCX_PASS || (int)NETKIT_DROP != (int)TCX_DROP || (int)NETKIT_REDIRECT != (int)TCX_REDIRECT); - ret = rtnl_link_register(&netkit_link_ops); - if (ret) - return ret; - ret = register_netdevice_notifier(&netkit_netdev_notifier); - if (ret) - rtnl_link_unregister(&netkit_link_ops); - return ret; + return rtnl_link_register(&netkit_link_ops); } -static __exit void netkit_mod_exit(void) +static __exit void netkit_exit(void) { - unregister_netdevice_notifier(&netkit_netdev_notifier); rtnl_link_unregister(&netkit_link_ops); } -module_init(netkit_mod_init); -module_exit(netkit_mod_exit); +module_init(netkit_init); +module_exit(netkit_exit); MODULE_DESCRIPTION("BPF-programmable network device"); MODULE_AUTHOR("Daniel Borkmann "); MODULE_AUTHOR("Nikolay Aleksandrov "); MODULE_LICENSE("GPL"); -MODULE_ALIAS_RTNL_LINK(NETKIT_DRV_NAME); +MODULE_ALIAS_RTNL_LINK(DRV_NAME); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4d146c000e21b..d99b0fbc1942a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3400,17 +3400,11 @@ static inline int dev_direct_xmit(struct sk_buff *skb, u16 queue_id) int register_netdevice(struct net_device *dev); void unregister_netdevice_queue(struct net_device *dev, struct list_head *head); void unregister_netdevice_many(struct list_head *head); - static inline void unregister_netdevice(struct net_device *dev) { unregister_netdevice_queue(dev, NULL); } -static inline bool unregister_netdevice_queued(const struct net_device *dev) -{ - return !list_empty(&dev->unreg_list); -} - int netdev_refcnt_read(const struct net_device *dev); void free_netdev(struct net_device *dev); diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index 81dc7cb2360c5..b55d3b9cb9c27 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -130,11 +130,6 @@ void netdev_stat_queue_sum(struct net_device *netdev, * @ndo_queue_get_dma_dev: Get dma device for zero-copy operations to be used * for this queue. Return NULL on error. * - * @ndo_queue_create: Create a new RX queue which can be leased to another queue. - * Ops on this queue are redirected to the leased queue e.g. - * when opening a memory provider. Return the new queue id on - * success. Return negative error code on failure. - * * Note that @ndo_queue_mem_alloc and @ndo_queue_mem_free may be called while * the interface is closed. @ndo_queue_start and @ndo_queue_stop will only * be called for an interface which is open. @@ -154,12 +149,9 @@ struct netdev_queue_mgmt_ops { int idx); struct device * (*ndo_queue_get_dma_dev)(struct net_device *dev, int idx); - int (*ndo_queue_create)(struct net_device *dev); }; -bool netif_rxq_has_unreadable_mp(struct net_device *dev, unsigned int rxq_idx); -bool netif_rxq_has_mp(struct net_device *dev, unsigned int rxq_idx); -bool netif_rxq_is_leased(struct net_device *dev, unsigned int rxq_idx); +bool netif_rxq_has_unreadable_mp(struct net_device *dev, int idx); /** * DOC: Lockless queue stopping / waking helpers. @@ -348,10 +340,5 @@ static inline unsigned int netif_xmit_timeout_ms(struct netdev_queue *txq) }) struct device *netdev_queue_get_dma_dev(struct net_device *dev, int idx); -bool netdev_can_create_queue(const struct net_device *dev, - struct netlink_ext_ack *extack); -bool netdev_can_lease_queue(const struct net_device *dev, - struct netlink_ext_ack *extack); -bool netdev_queue_busy(struct net_device *dev, int idx, - struct netlink_ext_ack *extack); -#endif /* _LINUX_NET_QUEUES_H */ + +#endif diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h index 508d11afaecbf..8cdcd138b33f2 100644 --- a/include/net/netdev_rx_queue.h +++ b/include/net/netdev_rx_queue.h @@ -28,8 +28,6 @@ struct netdev_rx_queue { #endif struct napi_struct *napi; struct pp_memory_provider_params mp_params; - struct netdev_rx_queue *lease; - netdevice_tracker lease_tracker; } ____cacheline_aligned_in_smp; /* @@ -59,22 +57,5 @@ get_netdev_rx_queue_index(struct netdev_rx_queue *queue) } int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq); -void netdev_rx_queue_lease(struct netdev_rx_queue *rxq_dst, - struct netdev_rx_queue *rxq_src); -void netdev_rx_queue_unlease(struct netdev_rx_queue *rxq_dst, - struct netdev_rx_queue *rxq_src); -bool netif_rx_queue_lease_get_owner(struct net_device **dev, unsigned int *rxq); -enum netif_lease_dir { - NETIF_VIRT_TO_PHYS, - NETIF_PHYS_TO_VIRT, -}; - -struct netdev_rx_queue * -__netif_get_rx_queue_lease(struct net_device **dev, unsigned int *rxq, - enum netif_lease_dir dir); -struct netdev_rx_queue * -netif_get_rx_queue_lease_locked(struct net_device **dev, unsigned int *rxq); -void netif_put_rx_queue_lease_locked(struct net_device *orig_dev, - struct net_device *dev); -#endif /* _LINUX_NETDEV_RX_QUEUE_H */ +#endif diff --git a/include/net/page_pool/memory_provider.h b/include/net/page_pool/memory_provider.h index b6f811c3416b6..ada4f968960ae 100644 --- a/include/net/page_pool/memory_provider.h +++ b/include/net/page_pool/memory_provider.h @@ -23,12 +23,12 @@ bool net_mp_niov_set_dma_addr(struct net_iov *niov, dma_addr_t addr); void net_mp_niov_set_page_pool(struct page_pool *pool, struct net_iov *niov); void net_mp_niov_clear_page_pool(struct net_iov *niov); -int net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, +int net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx, struct pp_memory_provider_params *p); int __net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, const struct pp_memory_provider_params *p, struct netlink_ext_ack *extack); -void net_mp_close_rxq(struct net_device *dev, unsigned int rxq_idx, +void net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx, struct pp_memory_provider_params *old_p); void __net_mp_close_rxq(struct net_device *dev, unsigned int rxq_idx, const struct pp_memory_provider_params *old_p); diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index c07cfb431eac8..242e34f771cca 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -28,7 +28,7 @@ void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries); bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc); u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max); void xsk_tx_release(struct xsk_buff_pool *pool); -struct xsk_buff_pool *xsk_get_pool_from_qid(const struct net_device *dev, +struct xsk_buff_pool *xsk_get_pool_from_qid(struct net_device *dev, u16 queue_id); void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool); void xsk_set_tx_need_wakeup(struct xsk_buff_pool *pool); diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index bbd5657572988..3b491d96e52eb 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1296,11 +1296,6 @@ enum netkit_mode { NETKIT_L3, }; -enum netkit_pairing { - NETKIT_DEVICE_PAIR, - NETKIT_DEVICE_SINGLE, -}; - /* NETKIT_SCRUB_NONE leaves clearing skb->{mark,priority} up to * the BPF program if attached. This also means the latter can * consume the two fields if they were populated earlier. @@ -1325,7 +1320,6 @@ enum { IFLA_NETKIT_PEER_SCRUB, IFLA_NETKIT_HEADROOM, IFLA_NETKIT_TAILROOM, - IFLA_NETKIT_PAIRING, __IFLA_NETKIT_MAX, }; #define IFLA_NETKIT_MAX (__IFLA_NETKIT_MAX - 1) diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 7df1056a35fda..e0b579a1df4f2 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -160,7 +160,6 @@ enum { NETDEV_A_QUEUE_DMABUF, NETDEV_A_QUEUE_IO_URING, NETDEV_A_QUEUE_XSK, - NETDEV_A_QUEUE_LEASE, __NETDEV_A_QUEUE_MAX, NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1) @@ -203,15 +202,6 @@ enum { NETDEV_A_QSTATS_MAX = (__NETDEV_A_QSTATS_MAX - 1) }; -enum { - NETDEV_A_LEASE_IFINDEX = 1, - NETDEV_A_LEASE_QUEUE, - NETDEV_A_LEASE_NETNS_ID, - - __NETDEV_A_LEASE_MAX, - NETDEV_A_LEASE_MAX = (__NETDEV_A_LEASE_MAX - 1) -}; - enum { NETDEV_A_DMABUF_IFINDEX = 1, NETDEV_A_DMABUF_QUEUES, @@ -238,7 +228,6 @@ enum { NETDEV_CMD_BIND_RX, NETDEV_CMD_NAPI_SET, NETDEV_CMD_BIND_TX, - NETDEV_CMD_QUEUE_CREATE, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) diff --git a/net/core/dev.c b/net/core/dev.c index 13a3de63a8254..2661b68f5be3b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1114,13 +1114,6 @@ netdev_get_by_index_lock_ops_compat(struct net *net, int ifindex) return __netdev_put_lock_ops_compat(dev, net); } -struct net_device * -netdev_put_lock(struct net_device *dev, netdevice_tracker *tracker) -{ - netdev_tracker_free(dev, tracker); - return __netdev_put_lock(dev, dev_net(dev)); -} - struct net_device * netdev_xa_find_lock(struct net *net, struct net_device *dev, unsigned long *index) diff --git a/net/core/dev.h b/net/core/dev.h index 9bcb76b325d01..da18536cbd357 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -30,8 +30,6 @@ netdev_napi_by_id_lock(struct net *net, unsigned int napi_id); struct net_device *dev_get_by_napi_id(unsigned int napi_id); struct net_device *__netdev_put_lock(struct net_device *dev, struct net *net); -struct net_device *netdev_put_lock(struct net_device *dev, - netdevice_tracker *tracker); struct net_device * netdev_xa_find_lock(struct net *net, struct net_device *dev, unsigned long *index); diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c index 52ba99c019e71..ba673e81716f6 100644 --- a/net/core/netdev-genl-gen.c +++ b/net/core/netdev-genl-gen.c @@ -28,12 +28,6 @@ static const struct netlink_range_validation netdev_a_napi_defer_hard_irqs_range }; /* Common nested types */ -const struct nla_policy netdev_lease_nl_policy[NETDEV_A_LEASE_NETNS_ID + 1] = { - [NETDEV_A_LEASE_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1), - [NETDEV_A_LEASE_QUEUE] = NLA_POLICY_NESTED(netdev_queue_id_nl_policy), - [NETDEV_A_LEASE_NETNS_ID] = { .type = NLA_S32, }, -}; - const struct nla_policy netdev_page_pool_info_nl_policy[NETDEV_A_PAGE_POOL_IFINDEX + 1] = { [NETDEV_A_PAGE_POOL_ID] = NLA_POLICY_FULL_RANGE(NLA_UINT, &netdev_a_page_pool_id_range), [NETDEV_A_PAGE_POOL_IFINDEX] = NLA_POLICY_FULL_RANGE(NLA_U32, &netdev_a_page_pool_ifindex_range), @@ -113,13 +107,6 @@ static const struct nla_policy netdev_bind_tx_nl_policy[NETDEV_A_DMABUF_FD + 1] [NETDEV_A_DMABUF_FD] = { .type = NLA_U32, }, }; -/* NETDEV_CMD_QUEUE_CREATE - do */ -static const struct nla_policy netdev_queue_create_nl_policy[NETDEV_A_QUEUE_LEASE + 1] = { - [NETDEV_A_QUEUE_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1), - [NETDEV_A_QUEUE_TYPE] = NLA_POLICY_MAX(NLA_U32, 1), - [NETDEV_A_QUEUE_LEASE] = NLA_POLICY_NESTED(netdev_lease_nl_policy), -}; - /* Ops table for netdev */ static const struct genl_split_ops netdev_nl_ops[] = { { @@ -218,13 +205,6 @@ static const struct genl_split_ops netdev_nl_ops[] = { .maxattr = NETDEV_A_DMABUF_FD, .flags = GENL_CMD_CAP_DO, }, - { - .cmd = NETDEV_CMD_QUEUE_CREATE, - .doit = netdev_nl_queue_create_doit, - .policy = netdev_queue_create_nl_policy, - .maxattr = NETDEV_A_QUEUE_LEASE, - .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, - }, }; static const struct genl_multicast_group netdev_nl_mcgrps[] = { diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h index d71b435d72c18..cffc08517a41f 100644 --- a/net/core/netdev-genl-gen.h +++ b/net/core/netdev-genl-gen.h @@ -14,7 +14,6 @@ #include /* Common nested types */ -extern const struct nla_policy netdev_lease_nl_policy[NETDEV_A_LEASE_NETNS_ID + 1]; extern const struct nla_policy netdev_page_pool_info_nl_policy[NETDEV_A_PAGE_POOL_IFINDEX + 1]; extern const struct nla_policy netdev_queue_id_nl_policy[NETDEV_A_QUEUE_TYPE + 1]; @@ -37,7 +36,6 @@ int netdev_nl_qstats_get_dumpit(struct sk_buff *skb, int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info); int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info); int netdev_nl_bind_tx_doit(struct sk_buff *skb, struct genl_info *info); -int netdev_nl_queue_create_doit(struct sk_buff *skb, struct genl_info *info); enum { NETDEV_NLGRP_MGMT, diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 51c830f88f103..470fabbeacd9b 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -391,11 +391,8 @@ netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev, u32 q_idx, u32 q_type, const struct genl_info *info) { struct pp_memory_provider_params *params; - struct net_device *orig_netdev = netdev; - struct nlattr *nest_lease, *nest_queue; struct netdev_rx_queue *rxq; struct netdev_queue *txq; - u32 lease_q_idx = q_idx; void *hdr; hdr = genlmsg_iput(rsp, info); @@ -413,37 +410,6 @@ netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev, if (nla_put_napi_id(rsp, rxq->napi)) goto nla_put_failure; - if (netif_rx_queue_lease_get_owner(&netdev, &lease_q_idx)) { - struct net *net, *peer_net; - - nest_lease = nla_nest_start(rsp, NETDEV_A_QUEUE_LEASE); - if (!nest_lease) - goto nla_put_failure; - nest_queue = nla_nest_start(rsp, NETDEV_A_LEASE_QUEUE); - if (!nest_queue) - goto nla_put_failure; - if (nla_put_u32(rsp, NETDEV_A_QUEUE_ID, lease_q_idx)) - goto nla_put_failure; - if (nla_put_u32(rsp, NETDEV_A_QUEUE_TYPE, q_type)) - goto nla_put_failure; - nla_nest_end(rsp, nest_queue); - if (nla_put_u32(rsp, NETDEV_A_LEASE_IFINDEX, - READ_ONCE(netdev->ifindex))) - goto nla_put_failure; - rcu_read_lock(); - peer_net = dev_net_rcu(netdev); - net = dev_net_rcu(orig_netdev); - if (!net_eq(net, peer_net)) { - s32 id = peernet2id_alloc(net, peer_net, GFP_ATOMIC); - - if (nla_put_s32(rsp, NETDEV_A_LEASE_NETNS_ID, id)) - goto nla_put_failure_unlock; - } - rcu_read_unlock(); - nla_nest_end(rsp, nest_lease); - netdev = orig_netdev; - } - params = &rxq->mp_params; if (params->mp_ops && params->mp_ops->nl_fill(params->mp_priv, rsp, rxq)) @@ -471,8 +437,6 @@ netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev, return 0; -nla_put_failure_unlock: - rcu_read_unlock(); nla_put_failure: genlmsg_cancel(rsp, hdr); return -EMSGSIZE; @@ -1156,155 +1120,6 @@ err_genlmsg_free: return err; } -int netdev_nl_queue_create_doit(struct sk_buff *skb, struct genl_info *info) -{ - const int qmaxtype = ARRAY_SIZE(netdev_queue_id_nl_policy) - 1; - const int lmaxtype = ARRAY_SIZE(netdev_lease_nl_policy) - 1; - int err, ifindex, ifindex_lease, queue_id, queue_id_lease; - struct nlattr *qtb[ARRAY_SIZE(netdev_queue_id_nl_policy)]; - struct nlattr *ltb[ARRAY_SIZE(netdev_lease_nl_policy)]; - struct netdev_rx_queue *rxq, *rxq_lease; - struct net_device *dev, *dev_lease; - netdevice_tracker dev_tracker; - struct nlattr *nest; - struct sk_buff *rsp; - void *hdr; - - if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_IFINDEX) || - GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_TYPE) || - GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_LEASE)) - return -EINVAL; - if (nla_get_u32(info->attrs[NETDEV_A_QUEUE_TYPE]) != - NETDEV_QUEUE_TYPE_RX) { - NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_QUEUE_TYPE]); - return -EINVAL; - } - - ifindex = nla_get_u32(info->attrs[NETDEV_A_QUEUE_IFINDEX]); - - nest = info->attrs[NETDEV_A_QUEUE_LEASE]; - err = nla_parse_nested(ltb, lmaxtype, nest, - netdev_lease_nl_policy, info->extack); - if (err < 0) - return err; - if (NL_REQ_ATTR_CHECK(info->extack, nest, ltb, NETDEV_A_LEASE_IFINDEX) || - NL_REQ_ATTR_CHECK(info->extack, nest, ltb, NETDEV_A_LEASE_QUEUE)) - return -EINVAL; - if (ltb[NETDEV_A_LEASE_NETNS_ID]) { - NL_SET_BAD_ATTR(info->extack, ltb[NETDEV_A_LEASE_NETNS_ID]); - return -EINVAL; - } - - ifindex_lease = nla_get_u32(ltb[NETDEV_A_LEASE_IFINDEX]); - - nest = ltb[NETDEV_A_LEASE_QUEUE]; - err = nla_parse_nested(qtb, qmaxtype, nest, - netdev_queue_id_nl_policy, info->extack); - if (err < 0) - return err; - if (NL_REQ_ATTR_CHECK(info->extack, nest, qtb, NETDEV_A_QUEUE_ID) || - NL_REQ_ATTR_CHECK(info->extack, nest, qtb, NETDEV_A_QUEUE_TYPE)) - return -EINVAL; - if (nla_get_u32(qtb[NETDEV_A_QUEUE_TYPE]) != NETDEV_QUEUE_TYPE_RX) { - NL_SET_BAD_ATTR(info->extack, qtb[NETDEV_A_QUEUE_TYPE]); - return -EINVAL; - } - if (ifindex == ifindex_lease) { - NL_SET_ERR_MSG(info->extack, - "Lease ifindex cannot be the same as queue creation ifindex"); - return -EINVAL; - } - - queue_id_lease = nla_get_u32(qtb[NETDEV_A_QUEUE_ID]); - - rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!rsp) - return -ENOMEM; - - hdr = genlmsg_iput(rsp, info); - if (!hdr) { - err = -EMSGSIZE; - goto err_genlmsg_free; - } - - /* Locking order is always from the virtual to the physical device - * since this is also the same order when applications open the - * memory provider later on. - */ - dev = netdev_get_by_index_lock(genl_info_net(info), ifindex); - if (!dev) { - err = -ENODEV; - goto err_genlmsg_free; - } - if (!netdev_can_create_queue(dev, info->extack)) { - err = -EINVAL; - goto err_unlock_dev; - } - - dev_lease = netdev_get_by_index(genl_info_net(info), ifindex_lease, - &dev_tracker, GFP_KERNEL); - if (!dev_lease) { - err = -ENODEV; - goto err_unlock_dev; - } - if (!netdev_can_lease_queue(dev_lease, info->extack)) { - netdev_put(dev_lease, &dev_tracker); - err = -EINVAL; - goto err_unlock_dev; - } - - dev_lease = netdev_put_lock(dev_lease, &dev_tracker); - if (!dev_lease) { - err = -ENODEV; - goto err_unlock_dev; - } - if (queue_id_lease >= dev_lease->real_num_rx_queues) { - err = -ERANGE; - NL_SET_BAD_ATTR(info->extack, qtb[NETDEV_A_QUEUE_ID]); - goto err_unlock_dev_lease; - } - if (netdev_queue_busy(dev_lease, queue_id_lease, info->extack)) { - err = -EBUSY; - goto err_unlock_dev_lease; - } - - rxq_lease = __netif_get_rx_queue(dev_lease, queue_id_lease); - rxq = __netif_get_rx_queue(dev, dev->real_num_rx_queues - 1); - - if (rxq->lease && rxq->lease->dev != dev_lease) { - err = -EOPNOTSUPP; - NL_SET_ERR_MSG(info->extack, - "Leasing multiple queues from different devices not supported"); - goto err_unlock_dev_lease; - } - - err = queue_id = dev->queue_mgmt_ops->ndo_queue_create(dev); - if (err < 0) { - NL_SET_ERR_MSG(info->extack, - "Device is unable to create a new queue"); - goto err_unlock_dev_lease; - } - - rxq = __netif_get_rx_queue(dev, queue_id); - netdev_rx_queue_lease(rxq, rxq_lease); - - nla_put_u32(rsp, NETDEV_A_QUEUE_ID, queue_id); - genlmsg_end(rsp, hdr); - - netdev_unlock(dev_lease); - netdev_unlock(dev); - - return genlmsg_reply(rsp, info); - -err_unlock_dev_lease: - netdev_unlock(dev_lease); -err_unlock_dev: - netdev_unlock(dev); -err_genlmsg_free: - nlmsg_free(rsp); - return err; -} - void netdev_nl_sock_priv_init(struct netdev_nl_sock *priv) { INIT_LIST_HEAD(&priv->bindings); diff --git a/net/core/netdev_queues.c b/net/core/netdev_queues.c index 97acf64408292..251f27a8307f5 100644 --- a/net/core/netdev_queues.c +++ b/net/core/netdev_queues.c @@ -1,37 +1,22 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include -#include -#include /** * netdev_queue_get_dma_dev() - get dma device for zero-copy operations * @dev: net_device * @idx: queue index * - * Get dma device for zero-copy operations to be used for this queue. If the - * queue is leased to a physical queue, we retrieve the latter's dma device. + * Get dma device for zero-copy operations to be used for this queue. * When such device is not available or valid, the function will return NULL. * * Return: Device or NULL on error */ struct device *netdev_queue_get_dma_dev(struct net_device *dev, int idx) { - const struct netdev_queue_mgmt_ops *queue_ops; + const struct netdev_queue_mgmt_ops *queue_ops = dev->queue_mgmt_ops; struct device *dma_dev; - if (idx < dev->real_num_rx_queues) { - struct netdev_rx_queue *rxq = __netif_get_rx_queue(dev, idx); - - if (rxq->lease) { - rxq = rxq->lease; - dev = rxq->dev; - idx = get_netdev_rx_queue_index(rxq); - } - } - - queue_ops = dev->queue_mgmt_ops; - if (queue_ops && queue_ops->ndo_queue_get_dma_dev) dma_dev = queue_ops->ndo_queue_get_dma_dev(dev, idx); else @@ -40,58 +25,3 @@ struct device *netdev_queue_get_dma_dev(struct net_device *dev, int idx) return dma_dev && dma_dev->dma_mask ? dma_dev : NULL; } -bool netdev_can_create_queue(const struct net_device *dev, - struct netlink_ext_ack *extack) -{ - if (dev->dev.parent) { - NL_SET_ERR_MSG(extack, "Device is not a virtual device"); - return false; - } - if (!dev->queue_mgmt_ops || - !dev->queue_mgmt_ops->ndo_queue_create) { - NL_SET_ERR_MSG(extack, "Device does not support queue creation"); - return false; - } - if (dev->real_num_rx_queues < 1 || - dev->real_num_tx_queues < 1) { - NL_SET_ERR_MSG(extack, "Device must have at least one real queue"); - return false; - } - return true; -} - -bool netdev_can_lease_queue(const struct net_device *dev, - struct netlink_ext_ack *extack) -{ - if (!dev->dev.parent) { - NL_SET_ERR_MSG(extack, "Lease device is a virtual device"); - return false; - } - if (!netif_device_present(dev)) { - NL_SET_ERR_MSG(extack, "Lease device has been removed from the system"); - return false; - } - if (!dev->queue_mgmt_ops) { - NL_SET_ERR_MSG(extack, "Lease device does not support queue management operations"); - return false; - } - return true; -} - -bool netdev_queue_busy(struct net_device *dev, int idx, - struct netlink_ext_ack *extack) -{ - if (netif_rxq_is_leased(dev, idx)) { - NL_SET_ERR_MSG(extack, "Lease device queue is already leased"); - return true; - } - if (xsk_get_pool_from_qid(dev, idx)) { - NL_SET_ERR_MSG(extack, "Lease device queue in use by AF_XDP"); - return true; - } - if (netif_rxq_has_mp(dev, idx)) { - NL_SET_ERR_MSG(extack, "Lease device queue in use by memory provider"); - return true; - } - return false; -} diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c index 75c7a68cb90de..c7d9341b76307 100644 --- a/net/core/netdev_rx_queue.c +++ b/net/core/netdev_rx_queue.c @@ -9,120 +9,14 @@ #include "page_pool_priv.h" -void netdev_rx_queue_lease(struct netdev_rx_queue *rxq_dst, - struct netdev_rx_queue *rxq_src) -{ - netdev_assert_locked(rxq_src->dev); - netdev_assert_locked(rxq_dst->dev); - - netdev_hold(rxq_src->dev, &rxq_src->lease_tracker, GFP_KERNEL); - - WRITE_ONCE(rxq_src->lease, rxq_dst); - WRITE_ONCE(rxq_dst->lease, rxq_src); -} - -void netdev_rx_queue_unlease(struct netdev_rx_queue *rxq_dst, - struct netdev_rx_queue *rxq_src) -{ - netdev_assert_locked(rxq_dst->dev); - netdev_assert_locked(rxq_src->dev); - - WRITE_ONCE(rxq_src->lease, NULL); - WRITE_ONCE(rxq_dst->lease, NULL); - - netdev_put(rxq_src->dev, &rxq_src->lease_tracker); -} - -bool netif_rxq_is_leased(struct net_device *dev, unsigned int rxq_idx) -{ - if (rxq_idx < dev->real_num_rx_queues) - return READ_ONCE(__netif_get_rx_queue(dev, rxq_idx)->lease); - return false; -} - -static bool netif_lease_dir_ok(const struct net_device *dev, - enum netif_lease_dir dir) -{ - if (dir == NETIF_VIRT_TO_PHYS && !dev->dev.parent) - return true; - if (dir == NETIF_PHYS_TO_VIRT && dev->dev.parent) - return true; - return false; -} - -struct netdev_rx_queue * -__netif_get_rx_queue_lease(struct net_device **dev, unsigned int *rxq_idx, - enum netif_lease_dir dir) -{ - struct net_device *orig_dev = *dev; - struct netdev_rx_queue *rxq = __netif_get_rx_queue(orig_dev, *rxq_idx); - - if (rxq->lease) { - if (!netif_lease_dir_ok(orig_dev, dir)) - return NULL; - rxq = rxq->lease; - *rxq_idx = get_netdev_rx_queue_index(rxq); - *dev = rxq->dev; - } - return rxq; -} - -struct netdev_rx_queue * -netif_get_rx_queue_lease_locked(struct net_device **dev, unsigned int *rxq_idx) -{ - struct net_device *orig_dev = *dev; - struct netdev_rx_queue *rxq; - - /* Locking order is always from the virtual to the physical device - * see netdev_nl_queue_create_doit(). - */ - netdev_ops_assert_locked(orig_dev); - rxq = __netif_get_rx_queue_lease(dev, rxq_idx, NETIF_VIRT_TO_PHYS); - if (rxq && orig_dev != *dev) - netdev_lock(*dev); - return rxq; -} - -void netif_put_rx_queue_lease_locked(struct net_device *orig_dev, - struct net_device *dev) -{ - if (orig_dev != dev) - netdev_unlock(dev); -} - -bool netif_rx_queue_lease_get_owner(struct net_device **dev, - unsigned int *rxq_idx) -{ - struct net_device *orig_dev = *dev; - struct netdev_rx_queue *rxq; - - /* The physical device needs to be locked. If there is indeed a lease, - * then the virtual device holds a reference on the physical device - * and the lease stays active until the virtual device is torn down. - * When queues get {un,}leased both devices are always locked. - */ - netdev_ops_assert_locked(orig_dev); - rxq = __netif_get_rx_queue_lease(dev, rxq_idx, NETIF_PHYS_TO_VIRT); - if (rxq && orig_dev != *dev) - return true; - return false; -} - /* See also page_pool_is_unreadable() */ -bool netif_rxq_has_unreadable_mp(struct net_device *dev, unsigned int rxq_idx) +bool netif_rxq_has_unreadable_mp(struct net_device *dev, int idx) { - if (rxq_idx < dev->real_num_rx_queues) - return __netif_get_rx_queue(dev, rxq_idx)->mp_params.mp_ops; - return false; -} -EXPORT_SYMBOL(netif_rxq_has_unreadable_mp); + struct netdev_rx_queue *rxq = __netif_get_rx_queue(dev, idx); -bool netif_rxq_has_mp(struct net_device *dev, unsigned int rxq_idx) -{ - if (rxq_idx < dev->real_num_rx_queues) - return __netif_get_rx_queue(dev, rxq_idx)->mp_params.mp_priv; - return false; + return !!rxq->mp_params.mp_ops; } +EXPORT_SYMBOL(netif_rxq_has_unreadable_mp); int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq_idx) { @@ -206,63 +100,49 @@ int __net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, const struct pp_memory_provider_params *p, struct netlink_ext_ack *extack) { - struct net_device *orig_dev = dev; struct netdev_rx_queue *rxq; int ret; if (!netdev_need_ops_lock(dev)) return -EOPNOTSUPP; + if (rxq_idx >= dev->real_num_rx_queues) { NL_SET_ERR_MSG(extack, "rx queue index out of range"); return -ERANGE; } - rxq_idx = array_index_nospec(rxq_idx, dev->real_num_rx_queues); - rxq = netif_get_rx_queue_lease_locked(&dev, &rxq_idx); - if (!rxq) { - NL_SET_ERR_MSG(extack, "rx queue peered to a virtual netdev"); - return -EBUSY; - } - if (!dev->dev.parent) { - NL_SET_ERR_MSG(extack, "rx queue is mapped to a virtual netdev"); - ret = -EBUSY; - goto out; - } + if (dev->cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED) { NL_SET_ERR_MSG(extack, "tcp-data-split is disabled"); - ret = -EINVAL; - goto out; + return -EINVAL; } if (dev->cfg->hds_thresh) { NL_SET_ERR_MSG(extack, "hds-thresh is not zero"); - ret = -EINVAL; - goto out; + return -EINVAL; } if (dev_xdp_prog_count(dev)) { NL_SET_ERR_MSG(extack, "unable to custom memory provider to device with XDP program attached"); - ret = -EEXIST; - goto out; + return -EEXIST; } + + rxq = __netif_get_rx_queue(dev, rxq_idx); if (rxq->mp_params.mp_ops) { NL_SET_ERR_MSG(extack, "designated queue already memory provider bound"); - ret = -EEXIST; - goto out; + return -EEXIST; } #ifdef CONFIG_XDP_SOCKETS if (rxq->pool) { NL_SET_ERR_MSG(extack, "designated queue already in use by AF_XDP"); - ret = -EBUSY; - goto out; + return -EBUSY; } #endif + rxq->mp_params = *p; ret = netdev_rx_queue_restart(dev, rxq_idx); if (ret) { rxq->mp_params.mp_ops = NULL; rxq->mp_params.mp_priv = NULL; } -out: - netif_put_rx_queue_lease_locked(orig_dev, dev); return ret; } @@ -277,43 +157,38 @@ int net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, return ret; } -void __net_mp_close_rxq(struct net_device *dev, unsigned int rxq_idx, +void __net_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx, const struct pp_memory_provider_params *old_p) { - struct net_device *orig_dev = dev; struct netdev_rx_queue *rxq; int err; - if (WARN_ON_ONCE(rxq_idx >= dev->real_num_rx_queues)) + if (WARN_ON_ONCE(ifq_idx >= dev->real_num_rx_queues)) return; - rxq = netif_get_rx_queue_lease_locked(&dev, &rxq_idx); - if (WARN_ON_ONCE(!rxq)) - return; + rxq = __netif_get_rx_queue(dev, ifq_idx); /* Callers holding a netdev ref may get here after we already * went thru shutdown via dev_memory_provider_uninstall(). */ if (dev->reg_state > NETREG_REGISTERED && !rxq->mp_params.mp_ops) - goto out; + return; if (WARN_ON_ONCE(rxq->mp_params.mp_ops != old_p->mp_ops || rxq->mp_params.mp_priv != old_p->mp_priv)) - goto out; + return; rxq->mp_params.mp_ops = NULL; rxq->mp_params.mp_priv = NULL; - err = netdev_rx_queue_restart(dev, rxq_idx); + err = netdev_rx_queue_restart(dev, ifq_idx); WARN_ON(err && err != -ENETDOWN); -out: - netif_put_rx_queue_lease_locked(orig_dev, dev); } -void net_mp_close_rxq(struct net_device *dev, unsigned int rxq_idx, +void net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx, struct pp_memory_provider_params *old_p) { netdev_lock(dev); - __net_mp_close_rxq(dev, rxq_idx, old_p); + __net_mp_close_rxq(dev, ifq_idx, old_p); netdev_unlock(dev); } diff --git a/net/ethtool/channels.c b/net/ethtool/channels.c index 797d2a08c5153..ca4f80282448b 100644 --- a/net/ethtool/channels.c +++ b/net/ethtool/channels.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only -#include +#include #include "netlink.h" #include "common.h" @@ -169,16 +169,14 @@ ethnl_set_channels(struct ethnl_req_info *req_info, struct genl_info *info) if (ret) return ret; - /* ensure channels are not busy at the moment */ + /* Disabling channels, query zero-copy AF_XDP sockets */ from_channel = channels.combined_count + min(channels.rx_count, channels.tx_count); - for (i = from_channel; i < old_total; i++) { - if (netdev_queue_busy(dev, i, NULL)) { - GENL_SET_ERR_MSG(info, - "requested channel counts are too low due to busy queues (AF_XDP or queue leasing)"); + for (i = from_channel; i < old_total; i++) + if (xsk_get_pool_from_qid(dev, i)) { + GENL_SET_ERR_MSG(info, "requested channel counts are too low for existing zerocopy AF_XDP sockets"); return -EINVAL; } - } ret = dev->ethtool_ops->set_channels(dev, &channels); return ret < 0 ? ret : 1; diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 02a3454234d6b..9431e305b2333 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -27,13 +27,12 @@ #include #include #include -#include #include #include +#include #include #include -#include - +#include #include "common.h" /* State held across locks and calls for commands which have devlink fallback */ @@ -2283,12 +2282,12 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev, if (ret) return ret; - /* Disabling channels, query busy queues (AF_XDP, queue leasing) */ + /* Disabling channels, query zero-copy AF_XDP sockets */ from_channel = channels.combined_count + min(channels.rx_count, channels.tx_count); to_channel = curr.combined_count + max(curr.rx_count, curr.tx_count); for (i = from_channel; i < to_channel; i++) - if (netdev_queue_busy(dev, i, NULL)) + if (xsk_get_pool_from_qid(dev, i)) return -EINVAL; ret = dev->ethtool_ops->set_channels(dev, &channels); diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 92f7914337251..3b46bc635c432 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -23,8 +23,6 @@ #include #include #include - -#include #include #include #include @@ -105,7 +103,7 @@ bool xsk_uses_need_wakeup(struct xsk_buff_pool *pool) } EXPORT_SYMBOL(xsk_uses_need_wakeup); -struct xsk_buff_pool *xsk_get_pool_from_qid(const struct net_device *dev, +struct xsk_buff_pool *xsk_get_pool_from_qid(struct net_device *dev, u16 queue_id) { if (queue_id < dev->real_num_rx_queues) @@ -119,18 +117,10 @@ EXPORT_SYMBOL(xsk_get_pool_from_qid); void xsk_clear_pool_at_qid(struct net_device *dev, u16 queue_id) { - struct net_device *orig_dev = dev; - unsigned int id = queue_id; - - if (id < dev->real_num_rx_queues) - WARN_ON_ONCE(!netif_get_rx_queue_lease_locked(&dev, &id)); - - if (id < dev->real_num_rx_queues) - dev->_rx[id].pool = NULL; - if (id < dev->real_num_tx_queues) - dev->_tx[id].pool = NULL; - - netif_put_rx_queue_lease_locked(orig_dev, dev); + if (queue_id < dev->num_rx_queues) + dev->_rx[queue_id].pool = NULL; + if (queue_id < dev->num_tx_queues) + dev->_tx[queue_id].pool = NULL; } /* The buffer pool is stored both in the _rx struct and the _tx struct as we do @@ -140,29 +130,17 @@ void xsk_clear_pool_at_qid(struct net_device *dev, u16 queue_id) int xsk_reg_pool_at_qid(struct net_device *dev, struct xsk_buff_pool *pool, u16 queue_id) { - struct net_device *orig_dev = dev; - unsigned int id = queue_id; - int ret = 0; - - if (id >= max(dev->real_num_rx_queues, - dev->real_num_tx_queues)) + if (queue_id >= max_t(unsigned int, + dev->real_num_rx_queues, + dev->real_num_tx_queues)) return -EINVAL; - if (id < dev->real_num_rx_queues) { - if (!netif_get_rx_queue_lease_locked(&dev, &id)) - return -EBUSY; - if (xsk_get_pool_from_qid(dev, id)) { - ret = -EBUSY; - goto out; - } - } - if (id < dev->real_num_rx_queues) - dev->_rx[id].pool = pool; - if (id < dev->real_num_tx_queues) - dev->_tx[id].pool = pool; -out: - netif_put_rx_queue_lease_locked(orig_dev, dev); - return ret; + if (queue_id < dev->real_num_rx_queues) + dev->_rx[queue_id].pool = pool; + if (queue_id < dev->real_num_tx_queues) + dev->_tx[queue_id].pool = pool; + + return 0; } static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff_xsk *xskb, u32 len, @@ -346,37 +324,14 @@ static bool xsk_is_bound(struct xdp_sock *xs) return false; } -static bool xsk_dev_queue_valid(const struct xdp_sock *xs, - const struct xdp_rxq_info *info) -{ - struct net_device *dev = xs->dev; - u32 queue_index = xs->queue_id; - struct netdev_rx_queue *rxq; - - if (info->dev == dev && - info->queue_index == queue_index) - return true; - - if (queue_index < dev->real_num_rx_queues) { - rxq = READ_ONCE(__netif_get_rx_queue(dev, queue_index)->lease); - if (!rxq) - return false; - - dev = rxq->dev; - queue_index = get_netdev_rx_queue_index(rxq); - - return info->dev == dev && - info->queue_index == queue_index; - } - return false; -} - static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) { if (!xsk_is_bound(xs)) return -ENXIO; - if (!xsk_dev_queue_valid(xs, xdp->rxq)) + + if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index) return -EINVAL; + if (len > xsk_pool_get_rx_frame_size(xs->pool) && !xs->sg) { xs->rx_dropped++; return -ENOSPC; diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index 7df1056a35fda..e0b579a1df4f2 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -160,7 +160,6 @@ enum { NETDEV_A_QUEUE_DMABUF, NETDEV_A_QUEUE_IO_URING, NETDEV_A_QUEUE_XSK, - NETDEV_A_QUEUE_LEASE, __NETDEV_A_QUEUE_MAX, NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1) @@ -203,15 +202,6 @@ enum { NETDEV_A_QSTATS_MAX = (__NETDEV_A_QSTATS_MAX - 1) }; -enum { - NETDEV_A_LEASE_IFINDEX = 1, - NETDEV_A_LEASE_QUEUE, - NETDEV_A_LEASE_NETNS_ID, - - __NETDEV_A_LEASE_MAX, - NETDEV_A_LEASE_MAX = (__NETDEV_A_LEASE_MAX - 1) -}; - enum { NETDEV_A_DMABUF_IFINDEX = 1, NETDEV_A_DMABUF_QUEUES, @@ -238,7 +228,6 @@ enum { NETDEV_CMD_BIND_RX, NETDEV_CMD_NAPI_SET, NETDEV_CMD_BIND_TX, - NETDEV_CMD_QUEUE_CREATE, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) diff --git a/tools/testing/selftests/drivers/net/README.rst b/tools/testing/selftests/drivers/net/README.rst index b94e81c2e0304..eb838ae948441 100644 --- a/tools/testing/selftests/drivers/net/README.rst +++ b/tools/testing/selftests/drivers/net/README.rst @@ -62,13 +62,6 @@ LOCAL_V4, LOCAL_V6, REMOTE_V4, REMOTE_V6 Local and remote endpoint IP addresses. -LOCAL_PREFIX_V4, LOCAL_PREFIX_V6 -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Local IP prefix/subnet which can be used to allocate extra IP addresses (for -network name spaces behind macvlan, veth, netkit devices). DUT must be -reachable using these addresses from the endpoint. - REMOTE_TYPE ~~~~~~~~~~~ diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile index 39ad86d693b3d..9c163ba6feee5 100644 --- a/tools/testing/selftests/drivers/net/hw/Makefile +++ b/tools/testing/selftests/drivers/net/hw/Makefile @@ -32,8 +32,6 @@ TEST_PROGS = \ irq.py \ loopback.sh \ nic_timestamp.py \ - nk_netns.py \ - nk_qlease.py \ pp_alloc_fail.py \ rss_api.py \ rss_ctx.py \ diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py index 0220082493136..d5d247eca6b7c 100644 --- a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py @@ -3,7 +3,6 @@ """ Driver test environment (hardware-only tests). NetDrvEnv and NetDrvEpEnv are the main environment classes. -NetDrvContEnv extends NetDrvEpEnv with netkit container support. Former is for local host only tests, latter creates / connects to a remote endpoint. See NIPA wiki for more information about running and writing driver tests. @@ -30,7 +29,7 @@ try: from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \ ksft_ne, ksft_not_in, ksft_raises, ksft_true, ksft_gt, ksft_not_none from drivers.net.lib.py import GenerateTraffic, Remote, Iperf3Runner - from drivers.net.lib.py import NetDrvEnv, NetDrvEpEnv, NetDrvContEnv + from drivers.net.lib.py import NetDrvEnv, NetDrvEpEnv __all__ = ["NetNS", "NetNSEnter", "NetdevSimDev", "EthtoolFamily", "NetdevFamily", "NetshaperFamily", @@ -45,8 +44,8 @@ try: "ksft_eq", "ksft_ge", "ksft_in", "ksft_is", "ksft_lt", "ksft_ne", "ksft_not_in", "ksft_raises", "ksft_true", "ksft_gt", "ksft_not_none", "ksft_not_none", - "NetDrvEnv", "NetDrvEpEnv", "NetDrvContEnv", "GenerateTraffic", - "Remote", "Iperf3Runner"] + "NetDrvEnv", "NetDrvEpEnv", "GenerateTraffic", "Remote", + "Iperf3Runner"] except ModuleNotFoundError as e: print("Failed importing `net` library from kernel sources") print(str(e)) diff --git a/tools/testing/selftests/drivers/net/hw/nk_forward.bpf.c b/tools/testing/selftests/drivers/net/hw/nk_forward.bpf.c deleted file mode 100644 index 86ebfc1445b62..0000000000000 --- a/tools/testing/selftests/drivers/net/hw/nk_forward.bpf.c +++ /dev/null @@ -1,49 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include -#include -#include -#include - -#define TC_ACT_OK 0 -#define ETH_P_IPV6 0x86DD - -#define ctx_ptr(field) ((void *)(long)(field)) - -#define v6_p64_equal(a, b) (a.s6_addr32[0] == b.s6_addr32[0] && \ - a.s6_addr32[1] == b.s6_addr32[1]) - -volatile __u32 netkit_ifindex; -volatile __u8 ipv6_prefix[16]; - -SEC("tc/ingress") -int tc_redirect_peer(struct __sk_buff *skb) -{ - void *data_end = ctx_ptr(skb->data_end); - void *data = ctx_ptr(skb->data); - struct in6_addr *peer_addr; - struct ipv6hdr *ip6h; - struct ethhdr *eth; - - peer_addr = (struct in6_addr *)ipv6_prefix; - - if (skb->protocol != bpf_htons(ETH_P_IPV6)) - return TC_ACT_OK; - - eth = data; - if ((void *)(eth + 1) > data_end) - return TC_ACT_OK; - - ip6h = data + sizeof(struct ethhdr); - if ((void *)(ip6h + 1) > data_end) - return TC_ACT_OK; - - if (!v6_p64_equal(ip6h->daddr, (*peer_addr))) - return TC_ACT_OK; - - return bpf_redirect_peer(netkit_ifindex, 0); -} - -char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/drivers/net/hw/nk_netns.py b/tools/testing/selftests/drivers/net/hw/nk_netns.py deleted file mode 100755 index afa8638195d85..0000000000000 --- a/tools/testing/selftests/drivers/net/hw/nk_netns.py +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0 - -from lib.py import ksft_run, ksft_exit -from lib.py import NetDrvContEnv -from lib.py import cmd - - -def test_ping(cfg) -> None: - cfg.require_ipver("6") - - cmd(f"ping -c 1 -W5 {cfg.nk_guest_ipv6}", host=cfg.remote) - cmd(f"ping -c 1 -W5 {cfg.remote_addr_v['6']}", ns=cfg.netns) - - -def main() -> None: - with NetDrvContEnv(__file__) as cfg: - ksft_run([test_ping], args=(cfg,)) - ksft_exit() - - -if __name__ == "__main__": - main() diff --git a/tools/testing/selftests/drivers/net/hw/nk_qlease.py b/tools/testing/selftests/drivers/net/hw/nk_qlease.py deleted file mode 100755 index 738a46d2d20cb..0000000000000 --- a/tools/testing/selftests/drivers/net/hw/nk_qlease.py +++ /dev/null @@ -1,55 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0 - -import re -from os import path -from lib.py import ksft_run, ksft_exit -from lib.py import NetDrvContEnv -from lib.py import bkg, cmd, defer, ethtool, rand_port, wait_port_listen - - -def create_rss_ctx(cfg): - output = ethtool(f"-X {cfg.ifname} context new start {cfg.src_queue} equal 1").stdout - values = re.search(r'New RSS context is (\d+)', output).group(1) - return int(values) - - -def set_flow_rule(cfg): - output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {cfg.port} action {cfg.src_queue}").stdout - values = re.search(r'ID (\d+)', output).group(1) - return int(values) - - -def set_flow_rule_rss(cfg, rss_ctx_id): - output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {cfg.port} context {rss_ctx_id}").stdout - values = re.search(r'ID (\d+)', output).group(1) - return int(values) - - -def test_iou_zcrx(cfg) -> None: - cfg.require_ipver('6') - - ethtool(f"-X {cfg.ifname} equal {cfg.src_queue}") - defer(ethtool, f"-X {cfg.ifname} default") - - flow_rule_id = set_flow_rule(cfg) - defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}") - - rx_cmd = f"ip netns exec {cfg.netns.name} {cfg.bin_local} -s -p {cfg.port} -i {cfg._nk_guest_ifname} -q {cfg.nk_queue}" - tx_cmd = f"{cfg.bin_remote} -c -h {cfg.nk_guest_ipv6} -p {cfg.port} -l 12840" - with bkg(rx_cmd, exit_wait=True): - wait_port_listen(cfg.port, proto="tcp", ns=cfg.netns) - cmd(tx_cmd, host=cfg.remote) - - -def main() -> None: - with NetDrvContEnv(__file__, lease=True) as cfg: - cfg.bin_local = path.abspath(path.dirname(__file__) + "/../../../drivers/net/hw/iou-zcrx") - cfg.bin_remote = cfg.remote.deploy(cfg.bin_local) - cfg.port = rand_port() - ksft_run([test_iou_zcrx], args=(cfg,)) - ksft_exit() - - -if __name__ == "__main__": - main() diff --git a/tools/testing/selftests/drivers/net/lib/py/__init__.py b/tools/testing/selftests/drivers/net/lib/py/__init__.py index be3a8a9368823..8b75faa9af6d0 100644 --- a/tools/testing/selftests/drivers/net/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/lib/py/__init__.py @@ -3,7 +3,6 @@ """ Driver test environment. NetDrvEnv and NetDrvEpEnv are the main environment classes. -NetDrvContEnv extends NetDrvEpEnv with netkit container support. Former is for local host only tests, latter creates / connects to a remote endpoint. See NIPA wiki for more information about running and writing driver tests. @@ -44,12 +43,12 @@ try: "ksft_ne", "ksft_not_in", "ksft_raises", "ksft_true", "ksft_gt", "ksft_not_none", "ksft_not_none"] - from .env import NetDrvEnv, NetDrvEpEnv, NetDrvContEnv + from .env import NetDrvEnv, NetDrvEpEnv from .load import GenerateTraffic, Iperf3Runner from .remote import Remote - __all__ += ["NetDrvEnv", "NetDrvEpEnv", "NetDrvContEnv", "GenerateTraffic", - "Remote", "Iperf3Runner"] + __all__ += ["NetDrvEnv", "NetDrvEpEnv", "GenerateTraffic", "Remote", + "Iperf3Runner"] except ModuleNotFoundError as e: print("Failed importing `net` library from kernel sources") print(str(e)) diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py index 7066d78395c67..41cc248ac8482 100644 --- a/tools/testing/selftests/drivers/net/lib/py/env.py +++ b/tools/testing/selftests/drivers/net/lib/py/env.py @@ -1,17 +1,13 @@ # SPDX-License-Identifier: GPL-2.0 -import ipaddress import os -import re import time from pathlib import Path from lib.py import KsftSkipEx, KsftXfailEx from lib.py import ksft_setup, wait_file from lib.py import cmd, ethtool, ip, CmdExitFailure from lib.py import NetNS, NetdevSimDev -from lib.py import NetdevFamily, EthtoolFamily from .remote import Remote -from . import bpftool class NetDrvEnvBase: @@ -293,156 +289,3 @@ class NetDrvEpEnv(NetDrvEnvBase): data.get('stats-block-usecs', 0) / 1000 / 1000 time.sleep(self._stats_settle_time) - - -class NetDrvContEnv(NetDrvEpEnv): - """ - Class for an environment with a netkit pair setup for forwarding traffic - between the physical interface and a network namespace. - """ - - def __init__(self, src_path, lease=False, **kwargs): - super().__init__(src_path, **kwargs) - - self.require_ipver("6") - local_prefix = self.env.get("LOCAL_PREFIX_V6") - if not local_prefix: - raise KsftSkipEx("LOCAL_PREFIX_V6 required") - - self.netdevnl = NetdevFamily() - self.ethnl = EthtoolFamily() - - local_prefix = local_prefix.rstrip("/64").rstrip("::").rstrip(":") - self.ipv6_prefix = f"{local_prefix}::" - self.nk_host_ipv6 = f"{local_prefix}::2:1" - self.nk_guest_ipv6 = f"{local_prefix}::2:2" - - self.netns = None - self._nk_host_ifname = None - self._nk_guest_ifname = None - self._tc_attached = False - self._bpf_prog_pref = None - self._bpf_prog_id = None - self._leased = False - - nk_rxqueues = 1 - if lease: - nk_rxqueues = 2 - ip(f"link add type netkit mode l2 forward peer forward numrxqueues {nk_rxqueues}") - - all_links = ip("-d link show", json=True) - netkit_links = [link for link in all_links - if link.get('linkinfo', {}).get('info_kind') == 'netkit' - and 'UP' not in link.get('flags', [])] - - if len(netkit_links) != 2: - raise KsftSkipEx("Failed to create netkit pair") - - netkit_links.sort(key=lambda x: x['ifindex']) - self._nk_host_ifname = netkit_links[1]['ifname'] - self._nk_guest_ifname = netkit_links[0]['ifname'] - self.nk_host_ifindex = netkit_links[1]['ifindex'] - self.nk_guest_ifindex = netkit_links[0]['ifindex'] - - if lease: - self._lease_queues() - - self._setup_ns() - self._attach_bpf() - - def __del__(self): - if self._tc_attached: - cmd(f"tc filter del dev {self.ifname} ingress pref {self._bpf_prog_pref}") - self._tc_attached = False - - if self._nk_host_ifname: - cmd(f"ip link del dev {self._nk_host_ifname}") - self._nk_host_ifname = None - self._nk_guest_ifname = None - - if self.netns: - del self.netns - self.netns = None - - if self._leased: - self.ethnl.rings_set({'header': {'dev-index': self.ifindex}, - 'tcp-data-split': 'unknown', - 'hds-thresh': self._hds_thresh, - 'rx': self._rx_rings}) - self._leased = False - - super().__del__() - - def _lease_queues(self): - channels = self.ethnl.channels_get({'header': {'dev-index': self.ifindex}}) - channels = channels['combined-count'] - if channels < 2: - raise KsftSkipEx('Test requires NETIF with at least 2 combined channels') - - rings = self.ethnl.rings_get({'header': {'dev-index': self.ifindex}}) - self._rx_rings = rings['rx'] - self._hds_thresh = rings.get('hds-thresh', 0) - self.ethnl.rings_set({'header': {'dev-index': self.ifindex}, - 'tcp-data-split': 'enabled', - 'hds-thresh': 0, - 'rx': 64}) - self.src_queue = channels - 1 - bind_result = self.netdevnl.queue_create( - { - "ifindex": self.nk_guest_ifindex, - "type": "rx", - "lease": { - "ifindex": self.ifindex, - "queue": {"id": self.src_queue, "type": "rx"}, - }, - } - ) - self.nk_queue = bind_result['id'] - self._leased = True - - def _setup_ns(self): - self.netns = NetNS() - ip(f"link set dev {self._nk_guest_ifname} netns {self.netns.name}") - ip(f"link set dev {self._nk_host_ifname} up") - ip(f"-6 addr add fe80::1/64 dev {self._nk_host_ifname} nodad") - ip(f"-6 route add {self.nk_guest_ipv6}/128 via fe80::2 dev {self._nk_host_ifname}") - - ip("link set lo up", ns=self.netns) - ip(f"link set dev {self._nk_guest_ifname} up", ns=self.netns) - ip(f"-6 addr add fe80::2/64 dev {self._nk_guest_ifname}", ns=self.netns) - ip(f"-6 addr add {self.nk_guest_ipv6}/64 dev {self._nk_guest_ifname} nodad", ns=self.netns) - ip(f"-6 route add default via fe80::1 dev {self._nk_guest_ifname}", ns=self.netns) - - def _attach_bpf(self): - bpf_obj = self.test_dir / "nk_forward.bpf.o" - if not bpf_obj.exists(): - raise KsftSkipEx("BPF prog not found") - - cmd(f"tc filter add dev {self.ifname} ingress bpf obj {bpf_obj} sec tc/ingress direct-action") - self._tc_attached = True - - tc_info = cmd(f"tc filter show dev {self.ifname} ingress").stdout - match = re.search(r'pref (\d+).*nk_forward\.bpf.*id (\d+)', tc_info) - if not match: - raise Exception("Failed to get BPF prog ID") - self._bpf_prog_pref = int(match.group(1)) - self._bpf_prog_id = int(match.group(2)) - - prog_info = bpftool(f"prog show id {self._bpf_prog_id}", json=True) - map_ids = prog_info.get("map_ids", []) - - bss_map_id = None - for map_id in map_ids: - map_info = bpftool(f"map show id {map_id}", json=True) - if map_info.get("name").endswith("bss"): - bss_map_id = map_id - - if bss_map_id is None: - raise Exception("Failed to find .bss map") - - ipv6_addr = ipaddress.IPv6Address(self.ipv6_prefix) - ipv6_bytes = ipv6_addr.packed - ifindex_bytes = self.nk_host_ifindex.to_bytes(4, byteorder='little') - value = ipv6_bytes + ifindex_bytes - value_hex = ' '.join(f'{b:02x}' for b in value) - bpftool(f"map update id {bss_map_id} key hex 00 00 00 00 value hex {value_hex}") -- 2.47.3