doc: XSK information for this queue, if any.
type: nest
nested-attributes: xsk-info
- -
- name: lease
- doc: |
- A queue from a virtual device can have a lease which refers to
- another queue from a physical device. This is useful for memory
- providers and AF_XDP operations which take an ifindex and queue id
- to allow applications to bind against virtual devices in containers.
- type: nest
- nested-attributes: lease
-
name: qstats
doc: |
name: id
-
name: type
- -
- name: lease
- attributes:
- -
- name: ifindex
- doc: The netdev ifindex to lease the queue from.
- type: u32
- checks:
- min: 1
- -
- name: queue
- doc: The netdev queue to lease from.
- type: nest
- nested-attributes: queue-id
- -
- name: netns-id
- doc: The network namespace id of the netdev.
- type: s32
-
name: dmabuf
attributes:
- dmabuf
- io-uring
- xsk
- - lease
dump:
request:
attributes:
reply:
attributes:
- id
- -
- name: queue-create
- doc: |
- Create a new queue for the given netdevice. Whether this operation
- is supported depends on the device and the driver.
- attribute-set: queue
- flags: [admin-perm]
- do:
- request:
- attributes:
- - ifindex
- - type
- - lease
- reply: &queue-create-op
- attributes:
- - id
kernel-family:
headers: ["net/netdev_netlink.h"]
#include <linux/bpf_mprog.h>
#include <linux/indirect_call_wrapper.h>
-#include <net/netdev_lock.h>
-#include <net/netdev_queues.h>
-#include <net/netdev_rx_queue.h>
-#include <net/xdp_sock_drv.h>
#include <net/netkit.h>
#include <net/dst.h>
#include <net/tcx.h>
-#define NETKIT_DRV_NAME "netkit"
-
-#define NETKIT_NUM_RX_QUEUES_MAX 1024
-#define NETKIT_NUM_TX_QUEUES_MAX 1
-
-#define NETKIT_NUM_RX_QUEUES_REAL 1
-#define NETKIT_NUM_TX_QUEUES_REAL 1
+#define DRV_NAME "netkit"
struct netkit {
__cacheline_group_begin(netkit_fastpath);
__cacheline_group_begin(netkit_slowpath);
enum netkit_mode mode;
- enum netkit_pairing pair;
bool primary;
u32 headroom;
__cacheline_group_end(netkit_slowpath);
struct net_device *dev;
};
-static struct rtnl_link_ops netkit_link_ops;
-
static __always_inline int
netkit_run(const struct bpf_mprog_entry *entry, struct sk_buff *skb,
enum netkit_action ret)
struct netkit *nk = netkit_priv(dev);
struct net_device *peer = rtnl_dereference(nk->peer);
- if (nk->pair == NETKIT_DEVICE_SINGLE) {
- netif_carrier_on(dev);
- return 0;
- }
if (!peer)
return -ENOTCONN;
if (peer->flags & IFF_UP) {
stats->tx_dropped = DEV_STATS_READ(dev, tx_dropped);
}
-static bool netkit_xsk_supported_at_phys(const struct net_device *dev)
-{
- if (!dev->netdev_ops->ndo_bpf ||
- !dev->netdev_ops->ndo_xdp_xmit ||
- !dev->netdev_ops->ndo_xsk_wakeup)
- return false;
- if ((dev->xdp_features & NETDEV_XDP_ACT_XSK) != NETDEV_XDP_ACT_XSK)
- return false;
- return true;
-}
-
-static int netkit_xsk(struct net_device *dev, struct netdev_bpf *xdp)
-{
- struct netkit *nk = netkit_priv(dev);
- struct netdev_bpf xdp_lower;
- struct netdev_rx_queue *rxq;
- struct net_device *phys;
- int ret = -EBUSY;
-
- switch (xdp->command) {
- case XDP_SETUP_XSK_POOL:
- if (nk->pair == NETKIT_DEVICE_PAIR)
- return -EOPNOTSUPP;
- if (xdp->xsk.queue_id >= dev->real_num_rx_queues)
- return -EINVAL;
-
- rxq = __netif_get_rx_queue(dev, xdp->xsk.queue_id);
- if (!rxq->lease)
- return -EOPNOTSUPP;
-
- phys = rxq->lease->dev;
- if (!netkit_xsk_supported_at_phys(phys))
- return -EOPNOTSUPP;
-
- memcpy(&xdp_lower, xdp, sizeof(xdp_lower));
- xdp_lower.xsk.queue_id = get_netdev_rx_queue_index(rxq->lease);
- break;
- case XDP_SETUP_PROG:
- return -EPERM;
- default:
- return -EINVAL;
- }
-
- netdev_lock(phys);
- if (!dev_get_min_mp_channel_count(phys))
- ret = phys->netdev_ops->ndo_bpf(phys, &xdp_lower);
- netdev_unlock(phys);
- return ret;
-}
-
-static int netkit_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
-{
- struct netdev_rx_queue *rxq;
- struct net_device *phys;
-
- if (queue_id >= dev->real_num_rx_queues)
- return -EINVAL;
-
- rxq = __netif_get_rx_queue(dev, queue_id);
- if (!rxq->lease)
- return -EOPNOTSUPP;
-
- phys = rxq->lease->dev;
- if (!netkit_xsk_supported_at_phys(phys))
- return -EOPNOTSUPP;
-
- return phys->netdev_ops->ndo_xsk_wakeup(phys,
- get_netdev_rx_queue_index(rxq->lease), flags);
-}
-
-static int netkit_init(struct net_device *dev)
-{
- netdev_lockdep_set_classes(dev);
- return 0;
-}
-
static void netkit_uninit(struct net_device *dev);
static const struct net_device_ops netkit_netdev_ops = {
- .ndo_init = netkit_init,
.ndo_open = netkit_open,
.ndo_stop = netkit_close,
.ndo_start_xmit = netkit_xmit,
.ndo_get_peer_dev = netkit_peer_dev,
.ndo_get_stats64 = netkit_get_stats,
.ndo_uninit = netkit_uninit,
- .ndo_bpf = netkit_xsk,
- .ndo_xsk_wakeup = netkit_xsk_wakeup,
.ndo_features_check = passthru_features_check,
};
static void netkit_get_drvinfo(struct net_device *dev,
struct ethtool_drvinfo *info)
{
- strscpy(info->driver, NETKIT_DRV_NAME, sizeof(info->driver));
+ strscpy(info->driver, DRV_NAME, sizeof(info->driver));
}
static const struct ethtool_ops netkit_ethtool_ops = {
.get_drvinfo = netkit_get_drvinfo,
};
-static int netkit_queue_create(struct net_device *dev)
-{
- struct netkit *nk = netkit_priv(dev);
- u32 rxq_count_old, rxq_count_new;
- int err;
-
- rxq_count_old = dev->real_num_rx_queues;
- rxq_count_new = rxq_count_old + 1;
-
- /* Only allow to lease a queue in single device mode or to
- * lease against the peer device which then ends up in the
- * target netns.
- */
- if (nk->pair == NETKIT_DEVICE_PAIR && nk->primary)
- return -EOPNOTSUPP;
-
- if (netif_running(dev))
- netif_carrier_off(dev);
- err = netif_set_real_num_rx_queues(dev, rxq_count_new);
- if (netif_running(dev))
- netif_carrier_on(dev);
-
- return err ? : rxq_count_old;
-}
-
-static const struct netdev_queue_mgmt_ops netkit_queue_mgmt_ops = {
- .ndo_queue_create = netkit_queue_create,
-};
-
-static struct net_device *netkit_alloc(struct nlattr *tb[],
- const char *ifname,
- unsigned char name_assign_type,
- unsigned int num_tx_queues,
- unsigned int num_rx_queues)
-{
- const struct rtnl_link_ops *ops = &netkit_link_ops;
- struct net_device *dev;
-
- if (num_tx_queues > NETKIT_NUM_TX_QUEUES_MAX ||
- num_rx_queues > NETKIT_NUM_RX_QUEUES_MAX)
- return ERR_PTR(-EOPNOTSUPP);
-
- dev = alloc_netdev_mqs(ops->priv_size, ifname,
- name_assign_type, ops->setup,
- num_tx_queues, num_rx_queues);
- if (dev) {
- dev->real_num_tx_queues = NETKIT_NUM_TX_QUEUES_REAL;
- dev->real_num_rx_queues = NETKIT_NUM_RX_QUEUES_REAL;
- }
- return dev;
-}
-
-static void netkit_queue_unlease(struct net_device *dev)
-{
- struct netdev_rx_queue *rxq, *rxq_lease;
- struct net_device *dev_lease;
- int i;
-
- if (dev->real_num_rx_queues == 1)
- return;
-
- netdev_lock(dev);
- for (i = 1; i < dev->real_num_rx_queues; i++) {
- rxq = __netif_get_rx_queue(dev, i);
- rxq_lease = rxq->lease;
- dev_lease = rxq_lease->dev;
-
- netdev_lock(dev_lease);
- netdev_rx_queue_unlease(rxq, rxq_lease);
- netdev_unlock(dev_lease);
- }
- netdev_unlock(dev);
-}
-
static void netkit_setup(struct net_device *dev)
{
static const netdev_features_t netkit_features_hw_vlan =
dev->priv_flags |= IFF_DISABLE_NETPOLL;
dev->lltx = true;
- dev->netdev_ops = &netkit_netdev_ops;
- dev->ethtool_ops = &netkit_ethtool_ops;
- dev->queue_mgmt_ops = &netkit_queue_mgmt_ops;
+ dev->ethtool_ops = &netkit_ethtool_ops;
+ dev->netdev_ops = &netkit_netdev_ops;
dev->features |= netkit_features;
dev->hw_features = netkit_features;
dev->hw_enc_features = netkit_features;
dev->mpls_features = NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE;
dev->vlan_features = dev->features & ~netkit_features_hw_vlan;
+
dev->needs_free_netdev = true;
netif_set_tso_max_size(dev, GSO_MAX_SIZE);
-
- xdp_set_features_flag(dev, NETDEV_XDP_ACT_XSK);
}
static struct net *netkit_get_link_net(const struct net_device *dev)
return 0;
}
+static struct rtnl_link_ops netkit_link_ops;
+
static int netkit_new_link(struct net_device *dev,
struct rtnl_newlink_params *params,
struct netlink_ext_ack *extack)
enum netkit_scrub scrub_prim = NETKIT_SCRUB_DEFAULT;
enum netkit_scrub scrub_peer = NETKIT_SCRUB_DEFAULT;
struct nlattr *peer_tb[IFLA_MAX + 1], **tbp, *attr;
- enum netkit_pairing pair = NETKIT_DEVICE_PAIR;
enum netkit_action policy_prim = NETKIT_PASS;
enum netkit_action policy_peer = NETKIT_PASS;
struct nlattr **data = params->data;
struct nlattr **tb = params->tb;
u16 headroom = 0, tailroom = 0;
struct ifinfomsg *ifmp = NULL;
- struct net_device *peer = NULL;
- bool seen_peer = false;
+ struct net_device *peer;
char ifname[IFNAMSIZ];
struct netkit *nk;
int err;
headroom = nla_get_u16(data[IFLA_NETKIT_HEADROOM]);
if (data[IFLA_NETKIT_TAILROOM])
tailroom = nla_get_u16(data[IFLA_NETKIT_TAILROOM]);
- if (data[IFLA_NETKIT_PAIRING])
- pair = nla_get_u32(data[IFLA_NETKIT_PAIRING]);
-
- seen_peer = data[IFLA_NETKIT_PEER_INFO] ||
- data[IFLA_NETKIT_PEER_SCRUB] ||
- data[IFLA_NETKIT_PEER_POLICY];
}
if (ifmp && tbp[IFLA_IFNAME]) {
if (mode != NETKIT_L2 &&
(tb[IFLA_ADDRESS] || tbp[IFLA_ADDRESS]))
return -EOPNOTSUPP;
- if (pair == NETKIT_DEVICE_SINGLE &&
- (tb != tbp || seen_peer || policy_prim != NETKIT_PASS))
- return -EOPNOTSUPP;
- if (pair == NETKIT_DEVICE_PAIR) {
- peer = rtnl_create_link(peer_net, ifname, ifname_assign_type,
- &netkit_link_ops, tbp, extack);
- if (IS_ERR(peer))
- return PTR_ERR(peer);
-
- netif_inherit_tso_max(peer, dev);
- if (headroom)
- peer->needed_headroom = headroom;
- if (tailroom)
- peer->needed_tailroom = tailroom;
- if (mode == NETKIT_L2 && !(ifmp && tbp[IFLA_ADDRESS]))
- eth_hw_addr_random(peer);
- if (ifmp && dev->ifindex)
- peer->ifindex = ifmp->ifi_index;
+ peer = rtnl_create_link(peer_net, ifname, ifname_assign_type,
+ &netkit_link_ops, tbp, extack);
+ if (IS_ERR(peer))
+ return PTR_ERR(peer);
- nk = netkit_priv(peer);
- nk->primary = false;
- nk->policy = policy_peer;
- nk->scrub = scrub_peer;
- nk->mode = mode;
- nk->pair = pair;
- nk->headroom = headroom;
- bpf_mprog_bundle_init(&nk->bundle);
-
- err = register_netdevice(peer);
- if (err < 0)
- goto err_register_peer;
- netif_carrier_off(peer);
- if (mode == NETKIT_L2)
- dev_change_flags(peer, peer->flags & ~IFF_NOARP, NULL);
-
- err = rtnl_configure_link(peer, NULL, 0, NULL);
- if (err < 0)
- goto err_configure_peer;
+ netif_inherit_tso_max(peer, dev);
+ if (headroom) {
+ peer->needed_headroom = headroom;
+ dev->needed_headroom = headroom;
}
+ if (tailroom) {
+ peer->needed_tailroom = tailroom;
+ dev->needed_tailroom = tailroom;
+ }
+
+ if (mode == NETKIT_L2 && !(ifmp && tbp[IFLA_ADDRESS]))
+ eth_hw_addr_random(peer);
+ if (ifmp && dev->ifindex)
+ peer->ifindex = ifmp->ifi_index;
+
+ nk = netkit_priv(peer);
+ nk->primary = false;
+ nk->policy = policy_peer;
+ nk->scrub = scrub_peer;
+ nk->mode = mode;
+ nk->headroom = headroom;
+ bpf_mprog_bundle_init(&nk->bundle);
+
+ err = register_netdevice(peer);
+ if (err < 0)
+ goto err_register_peer;
+ netif_carrier_off(peer);
+ if (mode == NETKIT_L2)
+ dev_change_flags(peer, peer->flags & ~IFF_NOARP, NULL);
+
+ err = rtnl_configure_link(peer, NULL, 0, NULL);
+ if (err < 0)
+ goto err_configure_peer;
if (mode == NETKIT_L2 && !tb[IFLA_ADDRESS])
eth_hw_addr_random(dev);
nla_strscpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ);
else
strscpy(dev->name, "nk%d", IFNAMSIZ);
- if (headroom)
- dev->needed_headroom = headroom;
- if (tailroom)
- dev->needed_tailroom = tailroom;
nk = netkit_priv(dev);
nk->primary = true;
nk->policy = policy_prim;
nk->scrub = scrub_prim;
nk->mode = mode;
- nk->pair = pair;
nk->headroom = headroom;
bpf_mprog_bundle_init(&nk->bundle);
dev_change_flags(dev, dev->flags & ~IFF_NOARP, NULL);
rcu_assign_pointer(netkit_priv(dev)->peer, peer);
- if (peer)
- rcu_assign_pointer(netkit_priv(peer)->peer, dev);
+ rcu_assign_pointer(netkit_priv(peer)->peer, dev);
return 0;
err_configure_peer:
- if (peer)
- unregister_netdevice(peer);
+ unregister_netdevice(peer);
return err;
err_register_peer:
free_netdev(peer);
nk = netkit_priv(dev);
if (!nk->primary)
return ERR_PTR(-EACCES);
- if (nk->pair == NETKIT_DEVICE_SINGLE)
- return ERR_PTR(-EOPNOTSUPP);
if (which == BPF_NETKIT_PEER) {
dev = rcu_dereference_rtnl(nk->peer);
if (!dev)
static void netkit_uninit(struct net_device *dev)
{
netkit_release_all(dev);
- netkit_queue_unlease(dev);
}
static void netkit_del_link(struct net_device *dev, struct list_head *head)
{ IFLA_NETKIT_PEER_INFO, "peer info" },
{ IFLA_NETKIT_HEADROOM, "headroom" },
{ IFLA_NETKIT_TAILROOM, "tailroom" },
- { IFLA_NETKIT_PAIRING, "pairing" },
};
if (!nk->primary) {
}
if (data[IFLA_NETKIT_POLICY]) {
- err = -EOPNOTSUPP;
attr = data[IFLA_NETKIT_POLICY];
policy = nla_get_u32(attr);
- if (nk->pair == NETKIT_DEVICE_PAIR)
- err = netkit_check_policy(policy, attr, extack);
+ err = netkit_check_policy(policy, attr, extack);
if (err)
return err;
WRITE_ONCE(nk->policy, policy);
return 0;
}
-static void netkit_check_lease_unregister(struct net_device *dev)
-{
- LIST_HEAD(list_kill);
- u32 q_idx;
-
- if (READ_ONCE(dev->reg_state) != NETREG_UNREGISTERING ||
- !dev->dev.parent)
- return;
-
- netdev_lock_ops(dev);
- for (q_idx = 0; q_idx < dev->real_num_rx_queues; q_idx++) {
- struct net_device *tmp = dev;
- u32 tmp_q_idx = q_idx;
-
- if (netif_rx_queue_lease_get_owner(&tmp, &tmp_q_idx)) {
- if (tmp->netdev_ops != &netkit_netdev_ops)
- continue;
- /* A single phys device can have multiple queues leased
- * to one netkit device. We can only queue that netkit
- * device once to the list_kill. Queues of that phys
- * device can be leased with different individual netkit
- * devices, hence we batch via list_kill.
- */
- if (unregister_netdevice_queued(tmp))
- continue;
- netkit_del_link(tmp, &list_kill);
- }
- }
- netdev_unlock_ops(dev);
- unregister_netdevice_many(&list_kill);
-}
-
-static int netkit_notifier(struct notifier_block *this,
- unsigned long event, void *ptr)
-{
- struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-
- if (event == NETDEV_UNREGISTER)
- netkit_check_lease_unregister(dev);
- return NOTIFY_DONE;
-}
-
static size_t netkit_get_size(const struct net_device *dev)
{
return nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_POLICY */
nla_total_size(sizeof(u8)) + /* IFLA_NETKIT_PRIMARY */
nla_total_size(sizeof(u16)) + /* IFLA_NETKIT_HEADROOM */
nla_total_size(sizeof(u16)) + /* IFLA_NETKIT_TAILROOM */
- nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_PAIRING */
0;
}
return -EMSGSIZE;
if (nla_put_u16(skb, IFLA_NETKIT_TAILROOM, dev->needed_tailroom))
return -EMSGSIZE;
- if (nla_put_u32(skb, IFLA_NETKIT_PAIRING, nk->pair))
- return -EMSGSIZE;
if (peer) {
nk = netkit_priv(peer);
[IFLA_NETKIT_TAILROOM] = { .type = NLA_U16 },
[IFLA_NETKIT_SCRUB] = NLA_POLICY_MAX(NLA_U32, NETKIT_SCRUB_DEFAULT),
[IFLA_NETKIT_PEER_SCRUB] = NLA_POLICY_MAX(NLA_U32, NETKIT_SCRUB_DEFAULT),
- [IFLA_NETKIT_PAIRING] = NLA_POLICY_MAX(NLA_U32, NETKIT_DEVICE_SINGLE),
[IFLA_NETKIT_PRIMARY] = { .type = NLA_REJECT,
.reject_message = "Primary attribute is read-only" },
};
static struct rtnl_link_ops netkit_link_ops = {
- .kind = NETKIT_DRV_NAME,
+ .kind = DRV_NAME,
.priv_size = sizeof(struct netkit),
- .alloc = netkit_alloc,
.setup = netkit_setup,
.newlink = netkit_new_link,
.dellink = netkit_del_link,
.maxtype = IFLA_NETKIT_MAX,
};
-static struct notifier_block netkit_netdev_notifier = {
- .notifier_call = netkit_notifier,
-};
-
-static __init int netkit_mod_init(void)
+static __init int netkit_init(void)
{
- int ret;
-
BUILD_BUG_ON((int)NETKIT_NEXT != (int)TCX_NEXT ||
(int)NETKIT_PASS != (int)TCX_PASS ||
(int)NETKIT_DROP != (int)TCX_DROP ||
(int)NETKIT_REDIRECT != (int)TCX_REDIRECT);
- ret = rtnl_link_register(&netkit_link_ops);
- if (ret)
- return ret;
- ret = register_netdevice_notifier(&netkit_netdev_notifier);
- if (ret)
- rtnl_link_unregister(&netkit_link_ops);
- return ret;
+ return rtnl_link_register(&netkit_link_ops);
}
-static __exit void netkit_mod_exit(void)
+static __exit void netkit_exit(void)
{
- unregister_netdevice_notifier(&netkit_netdev_notifier);
rtnl_link_unregister(&netkit_link_ops);
}
-module_init(netkit_mod_init);
-module_exit(netkit_mod_exit);
+module_init(netkit_init);
+module_exit(netkit_exit);
MODULE_DESCRIPTION("BPF-programmable network device");
MODULE_AUTHOR("Daniel Borkmann <daniel@iogearbox.net>");
MODULE_AUTHOR("Nikolay Aleksandrov <razor@blackwall.org>");
MODULE_LICENSE("GPL");
-MODULE_ALIAS_RTNL_LINK(NETKIT_DRV_NAME);
+MODULE_ALIAS_RTNL_LINK(DRV_NAME);
int register_netdevice(struct net_device *dev);
void unregister_netdevice_queue(struct net_device *dev, struct list_head *head);
void unregister_netdevice_many(struct list_head *head);
-
static inline void unregister_netdevice(struct net_device *dev)
{
unregister_netdevice_queue(dev, NULL);
}
-static inline bool unregister_netdevice_queued(const struct net_device *dev)
-{
- return !list_empty(&dev->unreg_list);
-}
-
int netdev_refcnt_read(const struct net_device *dev);
void free_netdev(struct net_device *dev);
* @ndo_queue_get_dma_dev: Get dma device for zero-copy operations to be used
* for this queue. Return NULL on error.
*
- * @ndo_queue_create: Create a new RX queue which can be leased to another queue.
- * Ops on this queue are redirected to the leased queue e.g.
- * when opening a memory provider. Return the new queue id on
- * success. Return negative error code on failure.
- *
* Note that @ndo_queue_mem_alloc and @ndo_queue_mem_free may be called while
* the interface is closed. @ndo_queue_start and @ndo_queue_stop will only
* be called for an interface which is open.
int idx);
struct device * (*ndo_queue_get_dma_dev)(struct net_device *dev,
int idx);
- int (*ndo_queue_create)(struct net_device *dev);
};
-bool netif_rxq_has_unreadable_mp(struct net_device *dev, unsigned int rxq_idx);
-bool netif_rxq_has_mp(struct net_device *dev, unsigned int rxq_idx);
-bool netif_rxq_is_leased(struct net_device *dev, unsigned int rxq_idx);
+bool netif_rxq_has_unreadable_mp(struct net_device *dev, int idx);
/**
* DOC: Lockless queue stopping / waking helpers.
})
struct device *netdev_queue_get_dma_dev(struct net_device *dev, int idx);
-bool netdev_can_create_queue(const struct net_device *dev,
- struct netlink_ext_ack *extack);
-bool netdev_can_lease_queue(const struct net_device *dev,
- struct netlink_ext_ack *extack);
-bool netdev_queue_busy(struct net_device *dev, int idx,
- struct netlink_ext_ack *extack);
-#endif /* _LINUX_NET_QUEUES_H */
+
+#endif
#endif
struct napi_struct *napi;
struct pp_memory_provider_params mp_params;
- struct netdev_rx_queue *lease;
- netdevice_tracker lease_tracker;
} ____cacheline_aligned_in_smp;
/*
}
int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq);
-void netdev_rx_queue_lease(struct netdev_rx_queue *rxq_dst,
- struct netdev_rx_queue *rxq_src);
-void netdev_rx_queue_unlease(struct netdev_rx_queue *rxq_dst,
- struct netdev_rx_queue *rxq_src);
-bool netif_rx_queue_lease_get_owner(struct net_device **dev, unsigned int *rxq);
-enum netif_lease_dir {
- NETIF_VIRT_TO_PHYS,
- NETIF_PHYS_TO_VIRT,
-};
-
-struct netdev_rx_queue *
-__netif_get_rx_queue_lease(struct net_device **dev, unsigned int *rxq,
- enum netif_lease_dir dir);
-struct netdev_rx_queue *
-netif_get_rx_queue_lease_locked(struct net_device **dev, unsigned int *rxq);
-void netif_put_rx_queue_lease_locked(struct net_device *orig_dev,
- struct net_device *dev);
-#endif /* _LINUX_NETDEV_RX_QUEUE_H */
+#endif
void net_mp_niov_set_page_pool(struct page_pool *pool, struct net_iov *niov);
void net_mp_niov_clear_page_pool(struct net_iov *niov);
-int net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx,
+int net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx,
struct pp_memory_provider_params *p);
int __net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx,
const struct pp_memory_provider_params *p,
struct netlink_ext_ack *extack);
-void net_mp_close_rxq(struct net_device *dev, unsigned int rxq_idx,
+void net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx,
struct pp_memory_provider_params *old_p);
void __net_mp_close_rxq(struct net_device *dev, unsigned int rxq_idx,
const struct pp_memory_provider_params *old_p);
bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc);
u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max);
void xsk_tx_release(struct xsk_buff_pool *pool);
-struct xsk_buff_pool *xsk_get_pool_from_qid(const struct net_device *dev,
+struct xsk_buff_pool *xsk_get_pool_from_qid(struct net_device *dev,
u16 queue_id);
void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool);
void xsk_set_tx_need_wakeup(struct xsk_buff_pool *pool);
NETKIT_L3,
};
-enum netkit_pairing {
- NETKIT_DEVICE_PAIR,
- NETKIT_DEVICE_SINGLE,
-};
-
/* NETKIT_SCRUB_NONE leaves clearing skb->{mark,priority} up to
* the BPF program if attached. This also means the latter can
* consume the two fields if they were populated earlier.
IFLA_NETKIT_PEER_SCRUB,
IFLA_NETKIT_HEADROOM,
IFLA_NETKIT_TAILROOM,
- IFLA_NETKIT_PAIRING,
__IFLA_NETKIT_MAX,
};
#define IFLA_NETKIT_MAX (__IFLA_NETKIT_MAX - 1)
NETDEV_A_QUEUE_DMABUF,
NETDEV_A_QUEUE_IO_URING,
NETDEV_A_QUEUE_XSK,
- NETDEV_A_QUEUE_LEASE,
__NETDEV_A_QUEUE_MAX,
NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1)
NETDEV_A_QSTATS_MAX = (__NETDEV_A_QSTATS_MAX - 1)
};
-enum {
- NETDEV_A_LEASE_IFINDEX = 1,
- NETDEV_A_LEASE_QUEUE,
- NETDEV_A_LEASE_NETNS_ID,
-
- __NETDEV_A_LEASE_MAX,
- NETDEV_A_LEASE_MAX = (__NETDEV_A_LEASE_MAX - 1)
-};
-
enum {
NETDEV_A_DMABUF_IFINDEX = 1,
NETDEV_A_DMABUF_QUEUES,
NETDEV_CMD_BIND_RX,
NETDEV_CMD_NAPI_SET,
NETDEV_CMD_BIND_TX,
- NETDEV_CMD_QUEUE_CREATE,
__NETDEV_CMD_MAX,
NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1)
return __netdev_put_lock_ops_compat(dev, net);
}
-struct net_device *
-netdev_put_lock(struct net_device *dev, netdevice_tracker *tracker)
-{
- netdev_tracker_free(dev, tracker);
- return __netdev_put_lock(dev, dev_net(dev));
-}
-
struct net_device *
netdev_xa_find_lock(struct net *net, struct net_device *dev,
unsigned long *index)
struct net_device *dev_get_by_napi_id(unsigned int napi_id);
struct net_device *__netdev_put_lock(struct net_device *dev, struct net *net);
-struct net_device *netdev_put_lock(struct net_device *dev,
- netdevice_tracker *tracker);
struct net_device *
netdev_xa_find_lock(struct net *net, struct net_device *dev,
unsigned long *index);
};
/* Common nested types */
-const struct nla_policy netdev_lease_nl_policy[NETDEV_A_LEASE_NETNS_ID + 1] = {
- [NETDEV_A_LEASE_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1),
- [NETDEV_A_LEASE_QUEUE] = NLA_POLICY_NESTED(netdev_queue_id_nl_policy),
- [NETDEV_A_LEASE_NETNS_ID] = { .type = NLA_S32, },
-};
-
const struct nla_policy netdev_page_pool_info_nl_policy[NETDEV_A_PAGE_POOL_IFINDEX + 1] = {
[NETDEV_A_PAGE_POOL_ID] = NLA_POLICY_FULL_RANGE(NLA_UINT, &netdev_a_page_pool_id_range),
[NETDEV_A_PAGE_POOL_IFINDEX] = NLA_POLICY_FULL_RANGE(NLA_U32, &netdev_a_page_pool_ifindex_range),
[NETDEV_A_DMABUF_FD] = { .type = NLA_U32, },
};
-/* NETDEV_CMD_QUEUE_CREATE - do */
-static const struct nla_policy netdev_queue_create_nl_policy[NETDEV_A_QUEUE_LEASE + 1] = {
- [NETDEV_A_QUEUE_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1),
- [NETDEV_A_QUEUE_TYPE] = NLA_POLICY_MAX(NLA_U32, 1),
- [NETDEV_A_QUEUE_LEASE] = NLA_POLICY_NESTED(netdev_lease_nl_policy),
-};
-
/* Ops table for netdev */
static const struct genl_split_ops netdev_nl_ops[] = {
{
.maxattr = NETDEV_A_DMABUF_FD,
.flags = GENL_CMD_CAP_DO,
},
- {
- .cmd = NETDEV_CMD_QUEUE_CREATE,
- .doit = netdev_nl_queue_create_doit,
- .policy = netdev_queue_create_nl_policy,
- .maxattr = NETDEV_A_QUEUE_LEASE,
- .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
- },
};
static const struct genl_multicast_group netdev_nl_mcgrps[] = {
#include <net/netdev_netlink.h>
/* Common nested types */
-extern const struct nla_policy netdev_lease_nl_policy[NETDEV_A_LEASE_NETNS_ID + 1];
extern const struct nla_policy netdev_page_pool_info_nl_policy[NETDEV_A_PAGE_POOL_IFINDEX + 1];
extern const struct nla_policy netdev_queue_id_nl_policy[NETDEV_A_QUEUE_TYPE + 1];
int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info);
int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info);
int netdev_nl_bind_tx_doit(struct sk_buff *skb, struct genl_info *info);
-int netdev_nl_queue_create_doit(struct sk_buff *skb, struct genl_info *info);
enum {
NETDEV_NLGRP_MGMT,
u32 q_idx, u32 q_type, const struct genl_info *info)
{
struct pp_memory_provider_params *params;
- struct net_device *orig_netdev = netdev;
- struct nlattr *nest_lease, *nest_queue;
struct netdev_rx_queue *rxq;
struct netdev_queue *txq;
- u32 lease_q_idx = q_idx;
void *hdr;
hdr = genlmsg_iput(rsp, info);
if (nla_put_napi_id(rsp, rxq->napi))
goto nla_put_failure;
- if (netif_rx_queue_lease_get_owner(&netdev, &lease_q_idx)) {
- struct net *net, *peer_net;
-
- nest_lease = nla_nest_start(rsp, NETDEV_A_QUEUE_LEASE);
- if (!nest_lease)
- goto nla_put_failure;
- nest_queue = nla_nest_start(rsp, NETDEV_A_LEASE_QUEUE);
- if (!nest_queue)
- goto nla_put_failure;
- if (nla_put_u32(rsp, NETDEV_A_QUEUE_ID, lease_q_idx))
- goto nla_put_failure;
- if (nla_put_u32(rsp, NETDEV_A_QUEUE_TYPE, q_type))
- goto nla_put_failure;
- nla_nest_end(rsp, nest_queue);
- if (nla_put_u32(rsp, NETDEV_A_LEASE_IFINDEX,
- READ_ONCE(netdev->ifindex)))
- goto nla_put_failure;
- rcu_read_lock();
- peer_net = dev_net_rcu(netdev);
- net = dev_net_rcu(orig_netdev);
- if (!net_eq(net, peer_net)) {
- s32 id = peernet2id_alloc(net, peer_net, GFP_ATOMIC);
-
- if (nla_put_s32(rsp, NETDEV_A_LEASE_NETNS_ID, id))
- goto nla_put_failure_unlock;
- }
- rcu_read_unlock();
- nla_nest_end(rsp, nest_lease);
- netdev = orig_netdev;
- }
-
params = &rxq->mp_params;
if (params->mp_ops &&
params->mp_ops->nl_fill(params->mp_priv, rsp, rxq))
return 0;
-nla_put_failure_unlock:
- rcu_read_unlock();
nla_put_failure:
genlmsg_cancel(rsp, hdr);
return -EMSGSIZE;
return err;
}
-int netdev_nl_queue_create_doit(struct sk_buff *skb, struct genl_info *info)
-{
- const int qmaxtype = ARRAY_SIZE(netdev_queue_id_nl_policy) - 1;
- const int lmaxtype = ARRAY_SIZE(netdev_lease_nl_policy) - 1;
- int err, ifindex, ifindex_lease, queue_id, queue_id_lease;
- struct nlattr *qtb[ARRAY_SIZE(netdev_queue_id_nl_policy)];
- struct nlattr *ltb[ARRAY_SIZE(netdev_lease_nl_policy)];
- struct netdev_rx_queue *rxq, *rxq_lease;
- struct net_device *dev, *dev_lease;
- netdevice_tracker dev_tracker;
- struct nlattr *nest;
- struct sk_buff *rsp;
- void *hdr;
-
- if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_IFINDEX) ||
- GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_TYPE) ||
- GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_LEASE))
- return -EINVAL;
- if (nla_get_u32(info->attrs[NETDEV_A_QUEUE_TYPE]) !=
- NETDEV_QUEUE_TYPE_RX) {
- NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_QUEUE_TYPE]);
- return -EINVAL;
- }
-
- ifindex = nla_get_u32(info->attrs[NETDEV_A_QUEUE_IFINDEX]);
-
- nest = info->attrs[NETDEV_A_QUEUE_LEASE];
- err = nla_parse_nested(ltb, lmaxtype, nest,
- netdev_lease_nl_policy, info->extack);
- if (err < 0)
- return err;
- if (NL_REQ_ATTR_CHECK(info->extack, nest, ltb, NETDEV_A_LEASE_IFINDEX) ||
- NL_REQ_ATTR_CHECK(info->extack, nest, ltb, NETDEV_A_LEASE_QUEUE))
- return -EINVAL;
- if (ltb[NETDEV_A_LEASE_NETNS_ID]) {
- NL_SET_BAD_ATTR(info->extack, ltb[NETDEV_A_LEASE_NETNS_ID]);
- return -EINVAL;
- }
-
- ifindex_lease = nla_get_u32(ltb[NETDEV_A_LEASE_IFINDEX]);
-
- nest = ltb[NETDEV_A_LEASE_QUEUE];
- err = nla_parse_nested(qtb, qmaxtype, nest,
- netdev_queue_id_nl_policy, info->extack);
- if (err < 0)
- return err;
- if (NL_REQ_ATTR_CHECK(info->extack, nest, qtb, NETDEV_A_QUEUE_ID) ||
- NL_REQ_ATTR_CHECK(info->extack, nest, qtb, NETDEV_A_QUEUE_TYPE))
- return -EINVAL;
- if (nla_get_u32(qtb[NETDEV_A_QUEUE_TYPE]) != NETDEV_QUEUE_TYPE_RX) {
- NL_SET_BAD_ATTR(info->extack, qtb[NETDEV_A_QUEUE_TYPE]);
- return -EINVAL;
- }
- if (ifindex == ifindex_lease) {
- NL_SET_ERR_MSG(info->extack,
- "Lease ifindex cannot be the same as queue creation ifindex");
- return -EINVAL;
- }
-
- queue_id_lease = nla_get_u32(qtb[NETDEV_A_QUEUE_ID]);
-
- rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!rsp)
- return -ENOMEM;
-
- hdr = genlmsg_iput(rsp, info);
- if (!hdr) {
- err = -EMSGSIZE;
- goto err_genlmsg_free;
- }
-
- /* Locking order is always from the virtual to the physical device
- * since this is also the same order when applications open the
- * memory provider later on.
- */
- dev = netdev_get_by_index_lock(genl_info_net(info), ifindex);
- if (!dev) {
- err = -ENODEV;
- goto err_genlmsg_free;
- }
- if (!netdev_can_create_queue(dev, info->extack)) {
- err = -EINVAL;
- goto err_unlock_dev;
- }
-
- dev_lease = netdev_get_by_index(genl_info_net(info), ifindex_lease,
- &dev_tracker, GFP_KERNEL);
- if (!dev_lease) {
- err = -ENODEV;
- goto err_unlock_dev;
- }
- if (!netdev_can_lease_queue(dev_lease, info->extack)) {
- netdev_put(dev_lease, &dev_tracker);
- err = -EINVAL;
- goto err_unlock_dev;
- }
-
- dev_lease = netdev_put_lock(dev_lease, &dev_tracker);
- if (!dev_lease) {
- err = -ENODEV;
- goto err_unlock_dev;
- }
- if (queue_id_lease >= dev_lease->real_num_rx_queues) {
- err = -ERANGE;
- NL_SET_BAD_ATTR(info->extack, qtb[NETDEV_A_QUEUE_ID]);
- goto err_unlock_dev_lease;
- }
- if (netdev_queue_busy(dev_lease, queue_id_lease, info->extack)) {
- err = -EBUSY;
- goto err_unlock_dev_lease;
- }
-
- rxq_lease = __netif_get_rx_queue(dev_lease, queue_id_lease);
- rxq = __netif_get_rx_queue(dev, dev->real_num_rx_queues - 1);
-
- if (rxq->lease && rxq->lease->dev != dev_lease) {
- err = -EOPNOTSUPP;
- NL_SET_ERR_MSG(info->extack,
- "Leasing multiple queues from different devices not supported");
- goto err_unlock_dev_lease;
- }
-
- err = queue_id = dev->queue_mgmt_ops->ndo_queue_create(dev);
- if (err < 0) {
- NL_SET_ERR_MSG(info->extack,
- "Device is unable to create a new queue");
- goto err_unlock_dev_lease;
- }
-
- rxq = __netif_get_rx_queue(dev, queue_id);
- netdev_rx_queue_lease(rxq, rxq_lease);
-
- nla_put_u32(rsp, NETDEV_A_QUEUE_ID, queue_id);
- genlmsg_end(rsp, hdr);
-
- netdev_unlock(dev_lease);
- netdev_unlock(dev);
-
- return genlmsg_reply(rsp, info);
-
-err_unlock_dev_lease:
- netdev_unlock(dev_lease);
-err_unlock_dev:
- netdev_unlock(dev);
-err_genlmsg_free:
- nlmsg_free(rsp);
- return err;
-}
-
void netdev_nl_sock_priv_init(struct netdev_nl_sock *priv)
{
INIT_LIST_HEAD(&priv->bindings);
// SPDX-License-Identifier: GPL-2.0-or-later
#include <net/netdev_queues.h>
-#include <net/netdev_rx_queue.h>
-#include <net/xdp_sock_drv.h>
/**
* netdev_queue_get_dma_dev() - get dma device for zero-copy operations
* @dev: net_device
* @idx: queue index
*
- * Get dma device for zero-copy operations to be used for this queue. If the
- * queue is leased to a physical queue, we retrieve the latter's dma device.
+ * Get dma device for zero-copy operations to be used for this queue.
* When such device is not available or valid, the function will return NULL.
*
* Return: Device or NULL on error
*/
struct device *netdev_queue_get_dma_dev(struct net_device *dev, int idx)
{
- const struct netdev_queue_mgmt_ops *queue_ops;
+ const struct netdev_queue_mgmt_ops *queue_ops = dev->queue_mgmt_ops;
struct device *dma_dev;
- if (idx < dev->real_num_rx_queues) {
- struct netdev_rx_queue *rxq = __netif_get_rx_queue(dev, idx);
-
- if (rxq->lease) {
- rxq = rxq->lease;
- dev = rxq->dev;
- idx = get_netdev_rx_queue_index(rxq);
- }
- }
-
- queue_ops = dev->queue_mgmt_ops;
-
if (queue_ops && queue_ops->ndo_queue_get_dma_dev)
dma_dev = queue_ops->ndo_queue_get_dma_dev(dev, idx);
else
return dma_dev && dma_dev->dma_mask ? dma_dev : NULL;
}
-bool netdev_can_create_queue(const struct net_device *dev,
- struct netlink_ext_ack *extack)
-{
- if (dev->dev.parent) {
- NL_SET_ERR_MSG(extack, "Device is not a virtual device");
- return false;
- }
- if (!dev->queue_mgmt_ops ||
- !dev->queue_mgmt_ops->ndo_queue_create) {
- NL_SET_ERR_MSG(extack, "Device does not support queue creation");
- return false;
- }
- if (dev->real_num_rx_queues < 1 ||
- dev->real_num_tx_queues < 1) {
- NL_SET_ERR_MSG(extack, "Device must have at least one real queue");
- return false;
- }
- return true;
-}
-
-bool netdev_can_lease_queue(const struct net_device *dev,
- struct netlink_ext_ack *extack)
-{
- if (!dev->dev.parent) {
- NL_SET_ERR_MSG(extack, "Lease device is a virtual device");
- return false;
- }
- if (!netif_device_present(dev)) {
- NL_SET_ERR_MSG(extack, "Lease device has been removed from the system");
- return false;
- }
- if (!dev->queue_mgmt_ops) {
- NL_SET_ERR_MSG(extack, "Lease device does not support queue management operations");
- return false;
- }
- return true;
-}
-
-bool netdev_queue_busy(struct net_device *dev, int idx,
- struct netlink_ext_ack *extack)
-{
- if (netif_rxq_is_leased(dev, idx)) {
- NL_SET_ERR_MSG(extack, "Lease device queue is already leased");
- return true;
- }
- if (xsk_get_pool_from_qid(dev, idx)) {
- NL_SET_ERR_MSG(extack, "Lease device queue in use by AF_XDP");
- return true;
- }
- if (netif_rxq_has_mp(dev, idx)) {
- NL_SET_ERR_MSG(extack, "Lease device queue in use by memory provider");
- return true;
- }
- return false;
-}
#include "page_pool_priv.h"
-void netdev_rx_queue_lease(struct netdev_rx_queue *rxq_dst,
- struct netdev_rx_queue *rxq_src)
-{
- netdev_assert_locked(rxq_src->dev);
- netdev_assert_locked(rxq_dst->dev);
-
- netdev_hold(rxq_src->dev, &rxq_src->lease_tracker, GFP_KERNEL);
-
- WRITE_ONCE(rxq_src->lease, rxq_dst);
- WRITE_ONCE(rxq_dst->lease, rxq_src);
-}
-
-void netdev_rx_queue_unlease(struct netdev_rx_queue *rxq_dst,
- struct netdev_rx_queue *rxq_src)
-{
- netdev_assert_locked(rxq_dst->dev);
- netdev_assert_locked(rxq_src->dev);
-
- WRITE_ONCE(rxq_src->lease, NULL);
- WRITE_ONCE(rxq_dst->lease, NULL);
-
- netdev_put(rxq_src->dev, &rxq_src->lease_tracker);
-}
-
-bool netif_rxq_is_leased(struct net_device *dev, unsigned int rxq_idx)
-{
- if (rxq_idx < dev->real_num_rx_queues)
- return READ_ONCE(__netif_get_rx_queue(dev, rxq_idx)->lease);
- return false;
-}
-
-static bool netif_lease_dir_ok(const struct net_device *dev,
- enum netif_lease_dir dir)
-{
- if (dir == NETIF_VIRT_TO_PHYS && !dev->dev.parent)
- return true;
- if (dir == NETIF_PHYS_TO_VIRT && dev->dev.parent)
- return true;
- return false;
-}
-
-struct netdev_rx_queue *
-__netif_get_rx_queue_lease(struct net_device **dev, unsigned int *rxq_idx,
- enum netif_lease_dir dir)
-{
- struct net_device *orig_dev = *dev;
- struct netdev_rx_queue *rxq = __netif_get_rx_queue(orig_dev, *rxq_idx);
-
- if (rxq->lease) {
- if (!netif_lease_dir_ok(orig_dev, dir))
- return NULL;
- rxq = rxq->lease;
- *rxq_idx = get_netdev_rx_queue_index(rxq);
- *dev = rxq->dev;
- }
- return rxq;
-}
-
-struct netdev_rx_queue *
-netif_get_rx_queue_lease_locked(struct net_device **dev, unsigned int *rxq_idx)
-{
- struct net_device *orig_dev = *dev;
- struct netdev_rx_queue *rxq;
-
- /* Locking order is always from the virtual to the physical device
- * see netdev_nl_queue_create_doit().
- */
- netdev_ops_assert_locked(orig_dev);
- rxq = __netif_get_rx_queue_lease(dev, rxq_idx, NETIF_VIRT_TO_PHYS);
- if (rxq && orig_dev != *dev)
- netdev_lock(*dev);
- return rxq;
-}
-
-void netif_put_rx_queue_lease_locked(struct net_device *orig_dev,
- struct net_device *dev)
-{
- if (orig_dev != dev)
- netdev_unlock(dev);
-}
-
-bool netif_rx_queue_lease_get_owner(struct net_device **dev,
- unsigned int *rxq_idx)
-{
- struct net_device *orig_dev = *dev;
- struct netdev_rx_queue *rxq;
-
- /* The physical device needs to be locked. If there is indeed a lease,
- * then the virtual device holds a reference on the physical device
- * and the lease stays active until the virtual device is torn down.
- * When queues get {un,}leased both devices are always locked.
- */
- netdev_ops_assert_locked(orig_dev);
- rxq = __netif_get_rx_queue_lease(dev, rxq_idx, NETIF_PHYS_TO_VIRT);
- if (rxq && orig_dev != *dev)
- return true;
- return false;
-}
-
/* See also page_pool_is_unreadable() */
-bool netif_rxq_has_unreadable_mp(struct net_device *dev, unsigned int rxq_idx)
+bool netif_rxq_has_unreadable_mp(struct net_device *dev, int idx)
{
- if (rxq_idx < dev->real_num_rx_queues)
- return __netif_get_rx_queue(dev, rxq_idx)->mp_params.mp_ops;
- return false;
-}
-EXPORT_SYMBOL(netif_rxq_has_unreadable_mp);
+ struct netdev_rx_queue *rxq = __netif_get_rx_queue(dev, idx);
-bool netif_rxq_has_mp(struct net_device *dev, unsigned int rxq_idx)
-{
- if (rxq_idx < dev->real_num_rx_queues)
- return __netif_get_rx_queue(dev, rxq_idx)->mp_params.mp_priv;
- return false;
+ return !!rxq->mp_params.mp_ops;
}
+EXPORT_SYMBOL(netif_rxq_has_unreadable_mp);
int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq_idx)
{
const struct pp_memory_provider_params *p,
struct netlink_ext_ack *extack)
{
- struct net_device *orig_dev = dev;
struct netdev_rx_queue *rxq;
int ret;
if (!netdev_need_ops_lock(dev))
return -EOPNOTSUPP;
+
if (rxq_idx >= dev->real_num_rx_queues) {
NL_SET_ERR_MSG(extack, "rx queue index out of range");
return -ERANGE;
}
-
rxq_idx = array_index_nospec(rxq_idx, dev->real_num_rx_queues);
- rxq = netif_get_rx_queue_lease_locked(&dev, &rxq_idx);
- if (!rxq) {
- NL_SET_ERR_MSG(extack, "rx queue peered to a virtual netdev");
- return -EBUSY;
- }
- if (!dev->dev.parent) {
- NL_SET_ERR_MSG(extack, "rx queue is mapped to a virtual netdev");
- ret = -EBUSY;
- goto out;
- }
+
if (dev->cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED) {
NL_SET_ERR_MSG(extack, "tcp-data-split is disabled");
- ret = -EINVAL;
- goto out;
+ return -EINVAL;
}
if (dev->cfg->hds_thresh) {
NL_SET_ERR_MSG(extack, "hds-thresh is not zero");
- ret = -EINVAL;
- goto out;
+ return -EINVAL;
}
if (dev_xdp_prog_count(dev)) {
NL_SET_ERR_MSG(extack, "unable to custom memory provider to device with XDP program attached");
- ret = -EEXIST;
- goto out;
+ return -EEXIST;
}
+
+ rxq = __netif_get_rx_queue(dev, rxq_idx);
if (rxq->mp_params.mp_ops) {
NL_SET_ERR_MSG(extack, "designated queue already memory provider bound");
- ret = -EEXIST;
- goto out;
+ return -EEXIST;
}
#ifdef CONFIG_XDP_SOCKETS
if (rxq->pool) {
NL_SET_ERR_MSG(extack, "designated queue already in use by AF_XDP");
- ret = -EBUSY;
- goto out;
+ return -EBUSY;
}
#endif
+
rxq->mp_params = *p;
ret = netdev_rx_queue_restart(dev, rxq_idx);
if (ret) {
rxq->mp_params.mp_ops = NULL;
rxq->mp_params.mp_priv = NULL;
}
-out:
- netif_put_rx_queue_lease_locked(orig_dev, dev);
return ret;
}
return ret;
}
-void __net_mp_close_rxq(struct net_device *dev, unsigned int rxq_idx,
+void __net_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx,
const struct pp_memory_provider_params *old_p)
{
- struct net_device *orig_dev = dev;
struct netdev_rx_queue *rxq;
int err;
- if (WARN_ON_ONCE(rxq_idx >= dev->real_num_rx_queues))
+ if (WARN_ON_ONCE(ifq_idx >= dev->real_num_rx_queues))
return;
- rxq = netif_get_rx_queue_lease_locked(&dev, &rxq_idx);
- if (WARN_ON_ONCE(!rxq))
- return;
+ rxq = __netif_get_rx_queue(dev, ifq_idx);
/* Callers holding a netdev ref may get here after we already
* went thru shutdown via dev_memory_provider_uninstall().
*/
if (dev->reg_state > NETREG_REGISTERED &&
!rxq->mp_params.mp_ops)
- goto out;
+ return;
if (WARN_ON_ONCE(rxq->mp_params.mp_ops != old_p->mp_ops ||
rxq->mp_params.mp_priv != old_p->mp_priv))
- goto out;
+ return;
rxq->mp_params.mp_ops = NULL;
rxq->mp_params.mp_priv = NULL;
- err = netdev_rx_queue_restart(dev, rxq_idx);
+ err = netdev_rx_queue_restart(dev, ifq_idx);
WARN_ON(err && err != -ENETDOWN);
-out:
- netif_put_rx_queue_lease_locked(orig_dev, dev);
}
-void net_mp_close_rxq(struct net_device *dev, unsigned int rxq_idx,
+void net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx,
struct pp_memory_provider_params *old_p)
{
netdev_lock(dev);
- __net_mp_close_rxq(dev, rxq_idx, old_p);
+ __net_mp_close_rxq(dev, ifq_idx, old_p);
netdev_unlock(dev);
}
// SPDX-License-Identifier: GPL-2.0-only
-#include <net/netdev_queues.h>
+#include <net/xdp_sock_drv.h>
#include "netlink.h"
#include "common.h"
if (ret)
return ret;
- /* ensure channels are not busy at the moment */
+ /* Disabling channels, query zero-copy AF_XDP sockets */
from_channel = channels.combined_count +
min(channels.rx_count, channels.tx_count);
- for (i = from_channel; i < old_total; i++) {
- if (netdev_queue_busy(dev, i, NULL)) {
- GENL_SET_ERR_MSG(info,
- "requested channel counts are too low due to busy queues (AF_XDP or queue leasing)");
+ for (i = from_channel; i < old_total; i++)
+ if (xsk_get_pool_from_qid(dev, i)) {
+ GENL_SET_ERR_MSG(info, "requested channel counts are too low for existing zerocopy AF_XDP sockets");
return -EINVAL;
}
- }
ret = dev->ethtool_ops->set_channels(dev, &channels);
return ret < 0 ? ret : 1;
#include <linux/net.h>
#include <linux/pm_runtime.h>
#include <linux/utsname.h>
-#include <linux/ethtool_netlink.h>
#include <net/devlink.h>
#include <net/ipv6.h>
+#include <net/xdp_sock_drv.h>
#include <net/flow_offload.h>
#include <net/netdev_lock.h>
-#include <net/netdev_queues.h>
-
+#include <linux/ethtool_netlink.h>
#include "common.h"
/* State held across locks and calls for commands which have devlink fallback */
if (ret)
return ret;
- /* Disabling channels, query busy queues (AF_XDP, queue leasing) */
+ /* Disabling channels, query zero-copy AF_XDP sockets */
from_channel = channels.combined_count +
min(channels.rx_count, channels.tx_count);
to_channel = curr.combined_count + max(curr.rx_count, curr.tx_count);
for (i = from_channel; i < to_channel; i++)
- if (netdev_queue_busy(dev, i, NULL))
+ if (xsk_get_pool_from_qid(dev, i))
return -EINVAL;
ret = dev->ethtool_ops->set_channels(dev, &channels);
#include <linux/netdevice.h>
#include <linux/rculist.h>
#include <linux/vmalloc.h>
-
-#include <net/netdev_queues.h>
#include <net/xdp_sock_drv.h>
#include <net/busy_poll.h>
#include <net/netdev_lock.h>
}
EXPORT_SYMBOL(xsk_uses_need_wakeup);
-struct xsk_buff_pool *xsk_get_pool_from_qid(const struct net_device *dev,
+struct xsk_buff_pool *xsk_get_pool_from_qid(struct net_device *dev,
u16 queue_id)
{
if (queue_id < dev->real_num_rx_queues)
void xsk_clear_pool_at_qid(struct net_device *dev, u16 queue_id)
{
- struct net_device *orig_dev = dev;
- unsigned int id = queue_id;
-
- if (id < dev->real_num_rx_queues)
- WARN_ON_ONCE(!netif_get_rx_queue_lease_locked(&dev, &id));
-
- if (id < dev->real_num_rx_queues)
- dev->_rx[id].pool = NULL;
- if (id < dev->real_num_tx_queues)
- dev->_tx[id].pool = NULL;
-
- netif_put_rx_queue_lease_locked(orig_dev, dev);
+ if (queue_id < dev->num_rx_queues)
+ dev->_rx[queue_id].pool = NULL;
+ if (queue_id < dev->num_tx_queues)
+ dev->_tx[queue_id].pool = NULL;
}
/* The buffer pool is stored both in the _rx struct and the _tx struct as we do
int xsk_reg_pool_at_qid(struct net_device *dev, struct xsk_buff_pool *pool,
u16 queue_id)
{
- struct net_device *orig_dev = dev;
- unsigned int id = queue_id;
- int ret = 0;
-
- if (id >= max(dev->real_num_rx_queues,
- dev->real_num_tx_queues))
+ if (queue_id >= max_t(unsigned int,
+ dev->real_num_rx_queues,
+ dev->real_num_tx_queues))
return -EINVAL;
- if (id < dev->real_num_rx_queues) {
- if (!netif_get_rx_queue_lease_locked(&dev, &id))
- return -EBUSY;
- if (xsk_get_pool_from_qid(dev, id)) {
- ret = -EBUSY;
- goto out;
- }
- }
- if (id < dev->real_num_rx_queues)
- dev->_rx[id].pool = pool;
- if (id < dev->real_num_tx_queues)
- dev->_tx[id].pool = pool;
-out:
- netif_put_rx_queue_lease_locked(orig_dev, dev);
- return ret;
+ if (queue_id < dev->real_num_rx_queues)
+ dev->_rx[queue_id].pool = pool;
+ if (queue_id < dev->real_num_tx_queues)
+ dev->_tx[queue_id].pool = pool;
+
+ return 0;
}
static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff_xsk *xskb, u32 len,
return false;
}
-static bool xsk_dev_queue_valid(const struct xdp_sock *xs,
- const struct xdp_rxq_info *info)
-{
- struct net_device *dev = xs->dev;
- u32 queue_index = xs->queue_id;
- struct netdev_rx_queue *rxq;
-
- if (info->dev == dev &&
- info->queue_index == queue_index)
- return true;
-
- if (queue_index < dev->real_num_rx_queues) {
- rxq = READ_ONCE(__netif_get_rx_queue(dev, queue_index)->lease);
- if (!rxq)
- return false;
-
- dev = rxq->dev;
- queue_index = get_netdev_rx_queue_index(rxq);
-
- return info->dev == dev &&
- info->queue_index == queue_index;
- }
- return false;
-}
-
static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
{
if (!xsk_is_bound(xs))
return -ENXIO;
- if (!xsk_dev_queue_valid(xs, xdp->rxq))
+
+ if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
return -EINVAL;
+
if (len > xsk_pool_get_rx_frame_size(xs->pool) && !xs->sg) {
xs->rx_dropped++;
return -ENOSPC;
NETDEV_A_QUEUE_DMABUF,
NETDEV_A_QUEUE_IO_URING,
NETDEV_A_QUEUE_XSK,
- NETDEV_A_QUEUE_LEASE,
__NETDEV_A_QUEUE_MAX,
NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1)
NETDEV_A_QSTATS_MAX = (__NETDEV_A_QSTATS_MAX - 1)
};
-enum {
- NETDEV_A_LEASE_IFINDEX = 1,
- NETDEV_A_LEASE_QUEUE,
- NETDEV_A_LEASE_NETNS_ID,
-
- __NETDEV_A_LEASE_MAX,
- NETDEV_A_LEASE_MAX = (__NETDEV_A_LEASE_MAX - 1)
-};
-
enum {
NETDEV_A_DMABUF_IFINDEX = 1,
NETDEV_A_DMABUF_QUEUES,
NETDEV_CMD_BIND_RX,
NETDEV_CMD_NAPI_SET,
NETDEV_CMD_BIND_TX,
- NETDEV_CMD_QUEUE_CREATE,
__NETDEV_CMD_MAX,
NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1)
Local and remote endpoint IP addresses.
-LOCAL_PREFIX_V4, LOCAL_PREFIX_V6
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Local IP prefix/subnet which can be used to allocate extra IP addresses (for
-network name spaces behind macvlan, veth, netkit devices). DUT must be
-reachable using these addresses from the endpoint.
-
REMOTE_TYPE
~~~~~~~~~~~
irq.py \
loopback.sh \
nic_timestamp.py \
- nk_netns.py \
- nk_qlease.py \
pp_alloc_fail.py \
rss_api.py \
rss_ctx.py \
"""
Driver test environment (hardware-only tests).
NetDrvEnv and NetDrvEpEnv are the main environment classes.
-NetDrvContEnv extends NetDrvEpEnv with netkit container support.
Former is for local host only tests, latter creates / connects
to a remote endpoint. See NIPA wiki for more information about
running and writing driver tests.
from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \
ksft_ne, ksft_not_in, ksft_raises, ksft_true, ksft_gt, ksft_not_none
from drivers.net.lib.py import GenerateTraffic, Remote, Iperf3Runner
- from drivers.net.lib.py import NetDrvEnv, NetDrvEpEnv, NetDrvContEnv
+ from drivers.net.lib.py import NetDrvEnv, NetDrvEpEnv
__all__ = ["NetNS", "NetNSEnter", "NetdevSimDev",
"EthtoolFamily", "NetdevFamily", "NetshaperFamily",
"ksft_eq", "ksft_ge", "ksft_in", "ksft_is", "ksft_lt",
"ksft_ne", "ksft_not_in", "ksft_raises", "ksft_true", "ksft_gt",
"ksft_not_none", "ksft_not_none",
- "NetDrvEnv", "NetDrvEpEnv", "NetDrvContEnv", "GenerateTraffic",
- "Remote", "Iperf3Runner"]
+ "NetDrvEnv", "NetDrvEpEnv", "GenerateTraffic", "Remote",
+ "Iperf3Runner"]
except ModuleNotFoundError as e:
print("Failed importing `net` library from kernel sources")
print(str(e))
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/bpf.h>
-#include <linux/pkt_cls.h>
-#include <linux/if_ether.h>
-#include <linux/ipv6.h>
-#include <linux/in6.h>
-#include <bpf/bpf_endian.h>
-#include <bpf/bpf_helpers.h>
-
-#define TC_ACT_OK 0
-#define ETH_P_IPV6 0x86DD
-
-#define ctx_ptr(field) ((void *)(long)(field))
-
-#define v6_p64_equal(a, b) (a.s6_addr32[0] == b.s6_addr32[0] && \
- a.s6_addr32[1] == b.s6_addr32[1])
-
-volatile __u32 netkit_ifindex;
-volatile __u8 ipv6_prefix[16];
-
-SEC("tc/ingress")
-int tc_redirect_peer(struct __sk_buff *skb)
-{
- void *data_end = ctx_ptr(skb->data_end);
- void *data = ctx_ptr(skb->data);
- struct in6_addr *peer_addr;
- struct ipv6hdr *ip6h;
- struct ethhdr *eth;
-
- peer_addr = (struct in6_addr *)ipv6_prefix;
-
- if (skb->protocol != bpf_htons(ETH_P_IPV6))
- return TC_ACT_OK;
-
- eth = data;
- if ((void *)(eth + 1) > data_end)
- return TC_ACT_OK;
-
- ip6h = data + sizeof(struct ethhdr);
- if ((void *)(ip6h + 1) > data_end)
- return TC_ACT_OK;
-
- if (!v6_p64_equal(ip6h->daddr, (*peer_addr)))
- return TC_ACT_OK;
-
- return bpf_redirect_peer(netkit_ifindex, 0);
-}
-
-char __license[] SEC("license") = "GPL";
+++ /dev/null
-#!/usr/bin/env python3
-# SPDX-License-Identifier: GPL-2.0
-
-from lib.py import ksft_run, ksft_exit
-from lib.py import NetDrvContEnv
-from lib.py import cmd
-
-
-def test_ping(cfg) -> None:
- cfg.require_ipver("6")
-
- cmd(f"ping -c 1 -W5 {cfg.nk_guest_ipv6}", host=cfg.remote)
- cmd(f"ping -c 1 -W5 {cfg.remote_addr_v['6']}", ns=cfg.netns)
-
-
-def main() -> None:
- with NetDrvContEnv(__file__) as cfg:
- ksft_run([test_ping], args=(cfg,))
- ksft_exit()
-
-
-if __name__ == "__main__":
- main()
+++ /dev/null
-#!/usr/bin/env python3
-# SPDX-License-Identifier: GPL-2.0
-
-import re
-from os import path
-from lib.py import ksft_run, ksft_exit
-from lib.py import NetDrvContEnv
-from lib.py import bkg, cmd, defer, ethtool, rand_port, wait_port_listen
-
-
-def create_rss_ctx(cfg):
- output = ethtool(f"-X {cfg.ifname} context new start {cfg.src_queue} equal 1").stdout
- values = re.search(r'New RSS context is (\d+)', output).group(1)
- return int(values)
-
-
-def set_flow_rule(cfg):
- output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {cfg.port} action {cfg.src_queue}").stdout
- values = re.search(r'ID (\d+)', output).group(1)
- return int(values)
-
-
-def set_flow_rule_rss(cfg, rss_ctx_id):
- output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {cfg.port} context {rss_ctx_id}").stdout
- values = re.search(r'ID (\d+)', output).group(1)
- return int(values)
-
-
-def test_iou_zcrx(cfg) -> None:
- cfg.require_ipver('6')
-
- ethtool(f"-X {cfg.ifname} equal {cfg.src_queue}")
- defer(ethtool, f"-X {cfg.ifname} default")
-
- flow_rule_id = set_flow_rule(cfg)
- defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}")
-
- rx_cmd = f"ip netns exec {cfg.netns.name} {cfg.bin_local} -s -p {cfg.port} -i {cfg._nk_guest_ifname} -q {cfg.nk_queue}"
- tx_cmd = f"{cfg.bin_remote} -c -h {cfg.nk_guest_ipv6} -p {cfg.port} -l 12840"
- with bkg(rx_cmd, exit_wait=True):
- wait_port_listen(cfg.port, proto="tcp", ns=cfg.netns)
- cmd(tx_cmd, host=cfg.remote)
-
-
-def main() -> None:
- with NetDrvContEnv(__file__, lease=True) as cfg:
- cfg.bin_local = path.abspath(path.dirname(__file__) + "/../../../drivers/net/hw/iou-zcrx")
- cfg.bin_remote = cfg.remote.deploy(cfg.bin_local)
- cfg.port = rand_port()
- ksft_run([test_iou_zcrx], args=(cfg,))
- ksft_exit()
-
-
-if __name__ == "__main__":
- main()
"""
Driver test environment.
NetDrvEnv and NetDrvEpEnv are the main environment classes.
-NetDrvContEnv extends NetDrvEpEnv with netkit container support.
Former is for local host only tests, latter creates / connects
to a remote endpoint. See NIPA wiki for more information about
running and writing driver tests.
"ksft_ne", "ksft_not_in", "ksft_raises", "ksft_true", "ksft_gt",
"ksft_not_none", "ksft_not_none"]
- from .env import NetDrvEnv, NetDrvEpEnv, NetDrvContEnv
+ from .env import NetDrvEnv, NetDrvEpEnv
from .load import GenerateTraffic, Iperf3Runner
from .remote import Remote
- __all__ += ["NetDrvEnv", "NetDrvEpEnv", "NetDrvContEnv", "GenerateTraffic",
- "Remote", "Iperf3Runner"]
+ __all__ += ["NetDrvEnv", "NetDrvEpEnv", "GenerateTraffic", "Remote",
+ "Iperf3Runner"]
except ModuleNotFoundError as e:
print("Failed importing `net` library from kernel sources")
print(str(e))
# SPDX-License-Identifier: GPL-2.0
-import ipaddress
import os
-import re
import time
from pathlib import Path
from lib.py import KsftSkipEx, KsftXfailEx
from lib.py import ksft_setup, wait_file
from lib.py import cmd, ethtool, ip, CmdExitFailure
from lib.py import NetNS, NetdevSimDev
-from lib.py import NetdevFamily, EthtoolFamily
from .remote import Remote
-from . import bpftool
class NetDrvEnvBase:
data.get('stats-block-usecs', 0) / 1000 / 1000
time.sleep(self._stats_settle_time)
-
-
-class NetDrvContEnv(NetDrvEpEnv):
- """
- Class for an environment with a netkit pair setup for forwarding traffic
- between the physical interface and a network namespace.
- """
-
- def __init__(self, src_path, lease=False, **kwargs):
- super().__init__(src_path, **kwargs)
-
- self.require_ipver("6")
- local_prefix = self.env.get("LOCAL_PREFIX_V6")
- if not local_prefix:
- raise KsftSkipEx("LOCAL_PREFIX_V6 required")
-
- self.netdevnl = NetdevFamily()
- self.ethnl = EthtoolFamily()
-
- local_prefix = local_prefix.rstrip("/64").rstrip("::").rstrip(":")
- self.ipv6_prefix = f"{local_prefix}::"
- self.nk_host_ipv6 = f"{local_prefix}::2:1"
- self.nk_guest_ipv6 = f"{local_prefix}::2:2"
-
- self.netns = None
- self._nk_host_ifname = None
- self._nk_guest_ifname = None
- self._tc_attached = False
- self._bpf_prog_pref = None
- self._bpf_prog_id = None
- self._leased = False
-
- nk_rxqueues = 1
- if lease:
- nk_rxqueues = 2
- ip(f"link add type netkit mode l2 forward peer forward numrxqueues {nk_rxqueues}")
-
- all_links = ip("-d link show", json=True)
- netkit_links = [link for link in all_links
- if link.get('linkinfo', {}).get('info_kind') == 'netkit'
- and 'UP' not in link.get('flags', [])]
-
- if len(netkit_links) != 2:
- raise KsftSkipEx("Failed to create netkit pair")
-
- netkit_links.sort(key=lambda x: x['ifindex'])
- self._nk_host_ifname = netkit_links[1]['ifname']
- self._nk_guest_ifname = netkit_links[0]['ifname']
- self.nk_host_ifindex = netkit_links[1]['ifindex']
- self.nk_guest_ifindex = netkit_links[0]['ifindex']
-
- if lease:
- self._lease_queues()
-
- self._setup_ns()
- self._attach_bpf()
-
- def __del__(self):
- if self._tc_attached:
- cmd(f"tc filter del dev {self.ifname} ingress pref {self._bpf_prog_pref}")
- self._tc_attached = False
-
- if self._nk_host_ifname:
- cmd(f"ip link del dev {self._nk_host_ifname}")
- self._nk_host_ifname = None
- self._nk_guest_ifname = None
-
- if self.netns:
- del self.netns
- self.netns = None
-
- if self._leased:
- self.ethnl.rings_set({'header': {'dev-index': self.ifindex},
- 'tcp-data-split': 'unknown',
- 'hds-thresh': self._hds_thresh,
- 'rx': self._rx_rings})
- self._leased = False
-
- super().__del__()
-
- def _lease_queues(self):
- channels = self.ethnl.channels_get({'header': {'dev-index': self.ifindex}})
- channels = channels['combined-count']
- if channels < 2:
- raise KsftSkipEx('Test requires NETIF with at least 2 combined channels')
-
- rings = self.ethnl.rings_get({'header': {'dev-index': self.ifindex}})
- self._rx_rings = rings['rx']
- self._hds_thresh = rings.get('hds-thresh', 0)
- self.ethnl.rings_set({'header': {'dev-index': self.ifindex},
- 'tcp-data-split': 'enabled',
- 'hds-thresh': 0,
- 'rx': 64})
- self.src_queue = channels - 1
- bind_result = self.netdevnl.queue_create(
- {
- "ifindex": self.nk_guest_ifindex,
- "type": "rx",
- "lease": {
- "ifindex": self.ifindex,
- "queue": {"id": self.src_queue, "type": "rx"},
- },
- }
- )
- self.nk_queue = bind_result['id']
- self._leased = True
-
- def _setup_ns(self):
- self.netns = NetNS()
- ip(f"link set dev {self._nk_guest_ifname} netns {self.netns.name}")
- ip(f"link set dev {self._nk_host_ifname} up")
- ip(f"-6 addr add fe80::1/64 dev {self._nk_host_ifname} nodad")
- ip(f"-6 route add {self.nk_guest_ipv6}/128 via fe80::2 dev {self._nk_host_ifname}")
-
- ip("link set lo up", ns=self.netns)
- ip(f"link set dev {self._nk_guest_ifname} up", ns=self.netns)
- ip(f"-6 addr add fe80::2/64 dev {self._nk_guest_ifname}", ns=self.netns)
- ip(f"-6 addr add {self.nk_guest_ipv6}/64 dev {self._nk_guest_ifname} nodad", ns=self.netns)
- ip(f"-6 route add default via fe80::1 dev {self._nk_guest_ifname}", ns=self.netns)
-
- def _attach_bpf(self):
- bpf_obj = self.test_dir / "nk_forward.bpf.o"
- if not bpf_obj.exists():
- raise KsftSkipEx("BPF prog not found")
-
- cmd(f"tc filter add dev {self.ifname} ingress bpf obj {bpf_obj} sec tc/ingress direct-action")
- self._tc_attached = True
-
- tc_info = cmd(f"tc filter show dev {self.ifname} ingress").stdout
- match = re.search(r'pref (\d+).*nk_forward\.bpf.*id (\d+)', tc_info)
- if not match:
- raise Exception("Failed to get BPF prog ID")
- self._bpf_prog_pref = int(match.group(1))
- self._bpf_prog_id = int(match.group(2))
-
- prog_info = bpftool(f"prog show id {self._bpf_prog_id}", json=True)
- map_ids = prog_info.get("map_ids", [])
-
- bss_map_id = None
- for map_id in map_ids:
- map_info = bpftool(f"map show id {map_id}", json=True)
- if map_info.get("name").endswith("bss"):
- bss_map_id = map_id
-
- if bss_map_id is None:
- raise Exception("Failed to find .bss map")
-
- ipv6_addr = ipaddress.IPv6Address(self.ipv6_prefix)
- ipv6_bytes = ipv6_addr.packed
- ifindex_bytes = self.nk_host_ifindex.to_bytes(4, byteorder='little')
- value = ipv6_bytes + ifindex_bytes
- value_hex = ' '.join(f'{b:02x}' for b in value)
- bpftool(f"map update id {bss_map_id} key hex 00 00 00 00 value hex {value_hex}")