GVE_PACKET_STATE_TIMED_OUT_COMPL,
};
+enum gve_tx_pending_packet_dqo_type {
+ GVE_TX_PENDING_PACKET_DQO_SKB,
+ GVE_TX_PENDING_PACKET_DQO_XDP_FRAME
+};
+
struct gve_tx_pending_packet_dqo {
- struct sk_buff *skb; /* skb for this packet */
+ union {
+ struct sk_buff *skb;
+ struct xdp_frame *xdpf;
+ };
/* 0th element corresponds to the linear portion of `skb`, should be
* unmapped with `dma_unmap_single`.
/* Identifies the current state of the packet as defined in
* `enum gve_packet_state`.
*/
- u8 state;
+ u8 state : 2;
+
+ /* gve_tx_pending_packet_dqo_type */
+ u8 type : 1;
/* If packet is an outstanding miss completion, then the packet is
* freed if the corresponding re-injection completion is not received
/* DQO fields. */
struct {
+ /* Spinlock for XDP tx traffic */
+ spinlock_t xdp_lock;
+
/* Linked list of gve_tx_pending_packet_dqo. Index into
* pending_packets, or -1 if empty.
*
{
switch (priv->queue_format) {
case GVE_GQI_QPL_FORMAT:
+ case GVE_DQO_RDA_FORMAT:
return true;
default:
return false;
netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev);
int gve_xdp_xmit_gqi(struct net_device *dev, int n, struct xdp_frame **frames,
u32 flags);
+int gve_xdp_xmit_dqo(struct net_device *dev, int n, struct xdp_frame **frames,
+ u32 flags);
int gve_xdp_xmit_one(struct gve_priv *priv, struct gve_tx_ring *tx,
void *data, int len, void *frame_p);
void gve_xdp_tx_flush(struct gve_priv *priv, u32 xdp_qid);
+int gve_xdp_xmit_one_dqo(struct gve_priv *priv, struct gve_tx_ring *tx,
+ struct xdp_frame *xdpf);
bool gve_tx_poll(struct gve_notify_block *block, int budget);
bool gve_xdp_poll(struct gve_notify_block *block, int budget);
int gve_xsk_tx_poll(struct gve_notify_block *block, int budget);
struct net_device *dev,
netdev_features_t features);
bool gve_tx_poll_dqo(struct gve_notify_block *block, bool do_clean);
+bool gve_xdp_poll_dqo(struct gve_notify_block *block);
int gve_rx_poll_dqo(struct gve_notify_block *block, int budget);
int gve_tx_alloc_rings_dqo(struct gve_priv *priv,
struct gve_tx_alloc_rings_cfg *cfg);
struct napi_struct *napi);
void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx);
void gve_rx_write_doorbell_dqo(const struct gve_priv *priv, int queue_idx);
+void gve_xdp_tx_flush_dqo(struct gve_priv *priv, u32 xdp_qid);
static inline void
gve_tx_put_doorbell_dqo(const struct gve_priv *priv,
bool reschedule = false;
int work_done = 0;
- if (block->tx)
- reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true);
+ if (block->tx) {
+ if (block->tx->q_num < priv->tx_cfg.num_queues)
+ reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true);
+ else
+ reschedule |= gve_xdp_poll_dqo(block);
+ }
if (!budget)
return 0;
{
struct gve_priv *priv = netdev_priv(dev);
- if (gve_is_gqi(priv))
+ if (priv->queue_format == GVE_GQI_QPL_FORMAT)
return gve_xdp_xmit_gqi(dev, n, frames, flags);
+ else if (priv->queue_format == GVE_DQO_RDA_FORMAT)
+ return gve_xdp_xmit_dqo(dev, n, frames, flags);
+
return -EOPNOTSUPP;
}
return -EOPNOTSUPP;
}
- if (priv->queue_format != GVE_GQI_QPL_FORMAT) {
- netdev_warn(dev, "XDP is not supported in mode %d.\n",
- priv->queue_format);
+ if (priv->header_split_enabled) {
+ netdev_warn(dev, "XDP is not supported when header-data split is enabled.\n");
return -EOPNOTSUPP;
}
return GVE_DEFAULT_RX_BUFFER_SIZE;
}
-/* header-split is not supported on non-DQO_RDA yet even if device advertises it */
+/* Header split is only supported on DQ RDA queue format. If XDP is enabled,
+ * header split is not allowed.
+ */
bool gve_header_split_supported(const struct gve_priv *priv)
{
- return priv->header_buf_size && priv->queue_format == GVE_DQO_RDA_FORMAT;
+ return priv->header_buf_size &&
+ priv->queue_format == GVE_DQO_RDA_FORMAT && !priv->xdp_prog;
}
int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split)
if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) {
netdev->features ^= NETIF_F_LRO;
+ if (priv->xdp_prog && (netdev->features & NETIF_F_LRO)) {
+ netdev_warn(netdev,
+ "XDP is not supported when LRO is on.\n");
+ err = -EOPNOTSUPP;
+ goto revert_features;
+ }
if (netif_running(netdev)) {
err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg);
if (err)
xdp_features = NETDEV_XDP_ACT_BASIC;
xdp_features |= NETDEV_XDP_ACT_REDIRECT;
xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY;
+ } else if (priv->queue_format == GVE_DQO_RDA_FORMAT) {
+ xdp_features = NETDEV_XDP_ACT_BASIC;
+ xdp_features |= NETDEV_XDP_ACT_REDIRECT;
} else {
xdp_features = 0;
}
#include "gve_dqo.h"
#include "gve_adminq.h"
#include "gve_utils.h"
+#include <linux/bpf.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/skbuff.h>
return 0;
}
+static int gve_xdp_tx_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
+ struct xdp_buff *xdp)
+{
+ struct gve_tx_ring *tx;
+ struct xdp_frame *xdpf;
+ u32 tx_qid;
+ int err;
+
+ xdpf = xdp_convert_buff_to_frame(xdp);
+ if (unlikely(!xdpf))
+ return -ENOSPC;
+
+ tx_qid = gve_xdp_tx_queue_id(priv, rx->q_num);
+ tx = &priv->tx[tx_qid];
+ spin_lock(&tx->dqo_tx.xdp_lock);
+ err = gve_xdp_xmit_one_dqo(priv, tx, xdpf);
+ spin_unlock(&tx->dqo_tx.xdp_lock);
+
+ return err;
+}
+
static void gve_xdp_done_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
struct xdp_buff *xdp, struct bpf_prog *xprog,
int xdp_act,
struct gve_rx_buf_state_dqo *buf_state)
{
- u64_stats_update_begin(&rx->statss);
+ int err;
switch (xdp_act) {
case XDP_ABORTED:
case XDP_DROP:
default:
- rx->xdp_actions[xdp_act]++;
+ gve_free_buffer(rx, buf_state);
break;
case XDP_TX:
- rx->xdp_tx_errors++;
+ err = gve_xdp_tx_dqo(priv, rx, xdp);
+ if (unlikely(err))
+ goto err;
+ gve_reuse_buffer(rx, buf_state);
break;
case XDP_REDIRECT:
- rx->xdp_redirect_errors++;
+ err = xdp_do_redirect(priv->dev, xdp, xprog);
+ if (unlikely(err))
+ goto err;
+ gve_reuse_buffer(rx, buf_state);
break;
}
+ u64_stats_update_begin(&rx->statss);
+ if ((u32)xdp_act < GVE_XDP_ACTIONS)
+ rx->xdp_actions[xdp_act]++;
+ u64_stats_update_end(&rx->statss);
+ return;
+err:
+ u64_stats_update_begin(&rx->statss);
+ if (xdp_act == XDP_TX)
+ rx->xdp_tx_errors++;
+ else if (xdp_act == XDP_REDIRECT)
+ rx->xdp_redirect_errors++;
u64_stats_update_end(&rx->statss);
gve_free_buffer(rx, buf_state);
+ return;
}
/* Returns 0 if descriptor is completed successfully.
int gve_rx_poll_dqo(struct gve_notify_block *block, int budget)
{
- struct napi_struct *napi = &block->napi;
- netdev_features_t feat = napi->dev->features;
-
- struct gve_rx_ring *rx = block->rx;
- struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq;
-
+ struct gve_rx_compl_queue_dqo *complq;
+ struct napi_struct *napi;
+ netdev_features_t feat;
+ struct gve_rx_ring *rx;
+ struct gve_priv *priv;
+ u64 xdp_redirects;
u32 work_done = 0;
u64 bytes = 0;
+ u64 xdp_txs;
int err;
+ napi = &block->napi;
+ feat = napi->dev->features;
+
+ rx = block->rx;
+ priv = rx->gve;
+ complq = &rx->dqo.complq;
+
+ xdp_redirects = rx->xdp_actions[XDP_REDIRECT];
+ xdp_txs = rx->xdp_actions[XDP_TX];
+
while (work_done < budget) {
struct gve_rx_compl_desc_dqo *compl_desc =
&complq->desc_ring[complq->head];
rx->ctx.skb_tail = NULL;
}
+ if (xdp_txs != rx->xdp_actions[XDP_TX])
+ gve_xdp_tx_flush_dqo(priv, rx->q_num);
+
+ if (xdp_redirects != rx->xdp_actions[XDP_REDIRECT])
+ xdp_do_flush();
+
gve_rx_post_buffers_dqo(rx);
u64_stats_update_begin(&rx->statss);
#include "gve_utils.h"
#include "gve_dqo.h"
#include <net/ip.h>
+#include <linux/bpf.h>
#include <linux/tcp.h>
#include <linux/slab.h>
#include <linux/skbuff.h>
return false;
}
+void gve_xdp_tx_flush_dqo(struct gve_priv *priv, u32 xdp_qid)
+{
+ u32 tx_qid = gve_xdp_tx_queue_id(priv, xdp_qid);
+ struct gve_tx_ring *tx = &priv->tx[tx_qid];
+
+ gve_tx_put_doorbell_dqo(priv, tx->q_resources, tx->dqo_tx.tail);
+}
+
static struct gve_tx_pending_packet_dqo *
gve_alloc_pending_packet(struct gve_tx_ring *tx)
{
gve_remove_napi(priv, ntfy_idx);
gve_clean_tx_done_dqo(priv, tx, /*napi=*/NULL);
- netdev_tx_reset_queue(tx->netdev_txq);
+ if (tx->netdev_txq)
+ netdev_tx_reset_queue(tx->netdev_txq);
gve_tx_clean_pending_packets(tx);
gve_tx_remove_from_block(priv, idx);
}
gve_tx_add_to_block(priv, idx);
- tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx);
+ if (idx < priv->tx_cfg.num_queues)
+ tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx);
gve_add_napi(priv, ntfy_idx, gve_napi_poll_dqo);
}
memset(tx, 0, sizeof(*tx));
tx->q_num = idx;
tx->dev = hdev;
+ spin_lock_init(&tx->dqo_tx.xdp_lock);
atomic_set_release(&tx->dqo_compl.hw_tx_head, 0);
/* Queue sizes must be a power of 2 */
return -ENOMEM;
pkt->skb = skb;
+ pkt->type = GVE_TX_PENDING_PACKET_DQO_SKB;
completion_tag = pkt - tx->dqo.pending_packets;
gve_extract_tx_metadata_dqo(skb, &metadata);
}
}
tx->dqo_tx.completed_packet_desc_cnt += pending_packet->num_bufs;
- if (tx->dqo.qpl)
- gve_free_tx_qpl_bufs(tx, pending_packet);
- else
+
+ switch (pending_packet->type) {
+ case GVE_TX_PENDING_PACKET_DQO_SKB:
+ if (tx->dqo.qpl)
+ gve_free_tx_qpl_bufs(tx, pending_packet);
+ else
+ gve_unmap_packet(tx->dev, pending_packet);
+ (*pkts)++;
+ *bytes += pending_packet->skb->len;
+
+ napi_consume_skb(pending_packet->skb, is_napi);
+ pending_packet->skb = NULL;
+ gve_free_pending_packet(tx, pending_packet);
+ break;
+ case GVE_TX_PENDING_PACKET_DQO_XDP_FRAME:
gve_unmap_packet(tx->dev, pending_packet);
+ (*pkts)++;
+ *bytes += pending_packet->xdpf->len;
- *bytes += pending_packet->skb->len;
- (*pkts)++;
- napi_consume_skb(pending_packet->skb, is_napi);
- pending_packet->skb = NULL;
- gve_free_pending_packet(tx, pending_packet);
+ xdp_return_frame(pending_packet->xdpf);
+ pending_packet->xdpf = NULL;
+ gve_free_pending_packet(tx, pending_packet);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ }
}
static void gve_handle_miss_completion(struct gve_priv *priv,
num_descs_cleaned++;
}
- netdev_tx_completed_queue(tx->netdev_txq,
- pkt_compl_pkts + miss_compl_pkts,
- pkt_compl_bytes + miss_compl_bytes);
+ if (tx->netdev_txq)
+ netdev_tx_completed_queue(tx->netdev_txq,
+ pkt_compl_pkts + miss_compl_pkts,
+ pkt_compl_bytes + miss_compl_bytes);
remove_miss_completions(priv, tx);
remove_timed_out_completions(priv, tx);
compl_desc = &tx->dqo.compl_ring[tx->dqo_compl.head];
return compl_desc->generation != tx->dqo_compl.cur_gen_bit;
}
+
+bool gve_xdp_poll_dqo(struct gve_notify_block *block)
+{
+ struct gve_tx_compl_desc *compl_desc;
+ struct gve_tx_ring *tx = block->tx;
+ struct gve_priv *priv = block->priv;
+
+ gve_clean_tx_done_dqo(priv, tx, &block->napi);
+
+ /* Return true if we still have work. */
+ compl_desc = &tx->dqo.compl_ring[tx->dqo_compl.head];
+ return compl_desc->generation != tx->dqo_compl.cur_gen_bit;
+}
+
+int gve_xdp_xmit_one_dqo(struct gve_priv *priv, struct gve_tx_ring *tx,
+ struct xdp_frame *xdpf)
+{
+ struct gve_tx_pending_packet_dqo *pkt;
+ u32 desc_idx = tx->dqo_tx.tail;
+ s16 completion_tag;
+ int num_descs = 1;
+ dma_addr_t addr;
+ int err;
+
+ if (unlikely(!gve_has_tx_slots_available(tx, num_descs)))
+ return -EBUSY;
+
+ pkt = gve_alloc_pending_packet(tx);
+ if (unlikely(!pkt))
+ return -EBUSY;
+
+ pkt->type = GVE_TX_PENDING_PACKET_DQO_XDP_FRAME;
+ pkt->num_bufs = 0;
+ pkt->xdpf = xdpf;
+ completion_tag = pkt - tx->dqo.pending_packets;
+
+ /* Generate Packet Descriptor */
+ addr = dma_map_single(tx->dev, xdpf->data, xdpf->len, DMA_TO_DEVICE);
+ err = dma_mapping_error(tx->dev, addr);
+ if (unlikely(err))
+ goto err;
+
+ dma_unmap_len_set(pkt, len[pkt->num_bufs], xdpf->len);
+ dma_unmap_addr_set(pkt, dma[pkt->num_bufs], addr);
+ pkt->num_bufs++;
+
+ gve_tx_fill_pkt_desc_dqo(tx, &desc_idx,
+ false, xdpf->len,
+ addr, completion_tag, true,
+ false);
+
+ gve_tx_update_tail(tx, desc_idx);
+ return 0;
+
+err:
+ pkt->xdpf = NULL;
+ pkt->num_bufs = 0;
+ gve_free_pending_packet(tx, pkt);
+ return err;
+}
+
+int gve_xdp_xmit_dqo(struct net_device *dev, int n, struct xdp_frame **frames,
+ u32 flags)
+{
+ struct gve_priv *priv = netdev_priv(dev);
+ struct gve_tx_ring *tx;
+ int i, err = 0, qid;
+
+ if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+ return -EINVAL;
+
+ qid = gve_xdp_tx_queue_id(priv,
+ smp_processor_id() % priv->tx_cfg.num_xdp_queues);
+
+ tx = &priv->tx[qid];
+
+ spin_lock(&tx->dqo_tx.xdp_lock);
+ for (i = 0; i < n; i++) {
+ err = gve_xdp_xmit_one_dqo(priv, tx, frames[i]);
+ if (err)
+ break;
+ }
+
+ if (flags & XDP_XMIT_FLUSH)
+ gve_tx_put_doorbell_dqo(priv, tx->q_resources, tx->dqo_tx.tail);
+
+ spin_unlock(&tx->dqo_tx.xdp_lock);
+
+ u64_stats_update_begin(&tx->statss);
+ tx->xdp_xmit += n;
+ tx->xdp_xmit_errors += n - i;
+ u64_stats_update_end(&tx->statss);
+
+ return i ? i : err;
+}