]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
net/mlx5e: Reuse per-RQ XDP buffer to avoid stack zeroing overhead
authorCarolina Jubran <cjubran@nvidia.com>
Wed, 14 May 2025 20:03:52 +0000 (23:03 +0300)
committerJakub Kicinski <kuba@kernel.org>
Fri, 16 May 2025 22:46:07 +0000 (15:46 -0700)
CONFIG_INIT_STACK_ALL_ZERO introduces a performance cost by
zero-initializing all stack variables on function entry. The mlx5 XDP
RX path previously allocated a struct mlx5e_xdp_buff on the stack per
received CQE, resulting in measurable performance degradation under
this config.

This patch reuses a mlx5e_xdp_buff stored in the mlx5e_rq struct,
avoiding per-CQE stack allocations and repeated zeroing.

With this change, XDP_DROP and XDP_TX performance matches that of
kernels built without CONFIG_INIT_STACK_ALL_ZERO.

Performance was measured on a ConnectX-6Dx using a single RX channel
(1 CPU at 100% usage) at ~50 Mpps. The baseline results were taken from
net-next-6.15.

Stack zeroing disabled:
- XDP_DROP:
    * baseline:                     31.47 Mpps
    * baseline + per-RQ allocation: 32.31 Mpps (+2.68%)

- XDP_TX:
    * baseline:                     12.41 Mpps
    * baseline + per-RQ allocation: 12.95 Mpps (+4.30%)

Stack zeroing enabled:
- XDP_DROP:
    * baseline:                     24.32 Mpps
    * baseline + per-RQ allocation: 32.27 Mpps (+32.7%)

- XDP_TX:
    * baseline:                     11.80 Mpps
    * baseline + per-RQ allocation: 12.24 Mpps (+3.72%)

Reported-by: Sebastiano Miano <mianosebastiano@gmail.com>
Reported-by: Samuel Dobron <sdobron@redhat.com>
Link: https://lore.kernel.org/all/CAMENy5pb8ea+piKLg5q5yRTMZacQqYWAoVLE1FE9WhQPq92E0g@mail.gmail.com/
Signed-off-by: Carolina Jubran <cjubran@nvidia.com>
Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Acked-by: Jesper Dangaard Brouer <hawk@kernel.org>
Link: https://patch.msgid.link/1747253032-663457-1-git-send-email-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
drivers/net/ethernet/mellanox/mlx5/core/en.h
drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
drivers/net/ethernet/mellanox/mlx5/core/en_rx.c

index 32ed4963b8adaa8111f1b8a83735773b1a6da19b..5b0d03b3efe8284f0594d9c622f240b0a803e8dc 100644 (file)
@@ -520,6 +520,12 @@ struct mlx5e_xdpsq {
        struct mlx5e_channel      *channel;
 } ____cacheline_aligned_in_smp;
 
+struct mlx5e_xdp_buff {
+       struct xdp_buff xdp;
+       struct mlx5_cqe64 *cqe;
+       struct mlx5e_rq *rq;
+};
+
 struct mlx5e_ktls_resync_resp;
 
 struct mlx5e_icosq {
@@ -716,6 +722,7 @@ struct mlx5e_rq {
        struct mlx5e_xdpsq    *xdpsq;
        DECLARE_BITMAP(flags, 8);
        struct page_pool      *page_pool;
+       struct mlx5e_xdp_buff mxbuf;
 
        /* AF_XDP zero-copy */
        struct xsk_buff_pool  *xsk_pool;
index 446e492c6bb8e37ca45f608c8be6812d2eb9eeb0..46ab0a9e8cddbb1419c07297add0d7d7d7247fe5 100644 (file)
        (MLX5E_XDP_INLINE_WQE_MAX_DS_CNT * MLX5_SEND_WQE_DS - \
         sizeof(struct mlx5_wqe_inline_seg))
 
-struct mlx5e_xdp_buff {
-       struct xdp_buff xdp;
-       struct mlx5_cqe64 *cqe;
-       struct mlx5e_rq *rq;
-};
-
 /* XDP packets can be transmitted in different ways. On completion, we need to
  * distinguish between them to clean up things in a proper way.
  */
index 5fd70b4d55beb4ed277a5ea896a6859350b72d21..84b1ab8233b8107f0d954ea29c33601b279a2c27 100644 (file)
@@ -1684,17 +1684,17 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi,
 
        prog = rcu_dereference(rq->xdp_prog);
        if (prog) {
-               struct mlx5e_xdp_buff mxbuf;
+               struct mlx5e_xdp_buff *mxbuf = &rq->mxbuf;
 
                net_prefetchw(va); /* xdp_frame data area */
                mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, rq->buff.frame0_sz,
-                                cqe_bcnt, &mxbuf);
-               if (mlx5e_xdp_handle(rq, prog, &mxbuf))
+                                cqe_bcnt, mxbuf);
+               if (mlx5e_xdp_handle(rq, prog, mxbuf))
                        return NULL; /* page/packet was consumed by XDP */
 
-               rx_headroom = mxbuf.xdp.data - mxbuf.xdp.data_hard_start;
-               metasize = mxbuf.xdp.data - mxbuf.xdp.data_meta;
-               cqe_bcnt = mxbuf.xdp.data_end - mxbuf.xdp.data;
+               rx_headroom = mxbuf->xdp.data - mxbuf->xdp.data_hard_start;
+               metasize = mxbuf->xdp.data - mxbuf->xdp.data_meta;
+               cqe_bcnt = mxbuf->xdp.data_end - mxbuf->xdp.data;
        }
        frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
        skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt, metasize);
@@ -1713,11 +1713,11 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
                             struct mlx5_cqe64 *cqe, u32 cqe_bcnt)
 {
        struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0];
+       struct mlx5e_xdp_buff *mxbuf = &rq->mxbuf;
        struct mlx5e_wqe_frag_info *head_wi = wi;
        u16 rx_headroom = rq->buff.headroom;
        struct mlx5e_frag_page *frag_page;
        struct skb_shared_info *sinfo;
-       struct mlx5e_xdp_buff mxbuf;
        u32 frag_consumed_bytes;
        struct bpf_prog *prog;
        struct sk_buff *skb;
@@ -1737,8 +1737,8 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
        net_prefetch(va + rx_headroom);
 
        mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, rq->buff.frame0_sz,
-                        frag_consumed_bytes, &mxbuf);
-       sinfo = xdp_get_shared_info_from_buff(&mxbuf.xdp);
+                        frag_consumed_bytes, mxbuf);
+       sinfo = xdp_get_shared_info_from_buff(&mxbuf->xdp);
        truesize = 0;
 
        cqe_bcnt -= frag_consumed_bytes;
@@ -1750,8 +1750,9 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
 
                frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt);
 
-               mlx5e_add_skb_shared_info_frag(rq, sinfo, &mxbuf.xdp, frag_page,
-                                              wi->offset, frag_consumed_bytes);
+               mlx5e_add_skb_shared_info_frag(rq, sinfo, &mxbuf->xdp,
+                                              frag_page, wi->offset,
+                                              frag_consumed_bytes);
                truesize += frag_info->frag_stride;
 
                cqe_bcnt -= frag_consumed_bytes;
@@ -1760,7 +1761,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
        }
 
        prog = rcu_dereference(rq->xdp_prog);
-       if (prog && mlx5e_xdp_handle(rq, prog, &mxbuf)) {
+       if (prog && mlx5e_xdp_handle(rq, prog, mxbuf)) {
                if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
                        struct mlx5e_wqe_frag_info *pwi;
 
@@ -1770,21 +1771,23 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
                return NULL; /* page/packet was consumed by XDP */
        }
 
-       skb = mlx5e_build_linear_skb(rq, mxbuf.xdp.data_hard_start, rq->buff.frame0_sz,
-                                    mxbuf.xdp.data - mxbuf.xdp.data_hard_start,
-                                    mxbuf.xdp.data_end - mxbuf.xdp.data,
-                                    mxbuf.xdp.data - mxbuf.xdp.data_meta);
+       skb = mlx5e_build_linear_skb(
+               rq, mxbuf->xdp.data_hard_start, rq->buff.frame0_sz,
+               mxbuf->xdp.data - mxbuf->xdp.data_hard_start,
+               mxbuf->xdp.data_end - mxbuf->xdp.data,
+               mxbuf->xdp.data - mxbuf->xdp.data_meta);
        if (unlikely(!skb))
                return NULL;
 
        skb_mark_for_recycle(skb);
        head_wi->frag_page->frags++;
 
-       if (xdp_buff_has_frags(&mxbuf.xdp)) {
+       if (xdp_buff_has_frags(&mxbuf->xdp)) {
                /* sinfo->nr_frags is reset by build_skb, calculate again. */
                xdp_update_skb_shared_info(skb, wi - head_wi - 1,
                                           sinfo->xdp_frags_size, truesize,
-                                          xdp_buff_is_frag_pfmemalloc(&mxbuf.xdp));
+                                          xdp_buff_is_frag_pfmemalloc(
+                                               &mxbuf->xdp));
 
                for (struct mlx5e_wqe_frag_info *pwi = head_wi + 1; pwi < wi; pwi++)
                        pwi->frag_page->frags++;
@@ -1984,10 +1987,10 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
        struct mlx5e_frag_page *frag_page = &wi->alloc_units.frag_pages[page_idx];
        u16 headlen = min_t(u16, MLX5E_RX_MAX_HEAD, cqe_bcnt);
        struct mlx5e_frag_page *head_page = frag_page;
+       struct mlx5e_xdp_buff *mxbuf = &rq->mxbuf;
        u32 frag_offset    = head_offset;
        u32 byte_cnt       = cqe_bcnt;
        struct skb_shared_info *sinfo;
-       struct mlx5e_xdp_buff mxbuf;
        unsigned int truesize = 0;
        struct bpf_prog *prog;
        struct sk_buff *skb;
@@ -2033,9 +2036,10 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
                }
        }
 
-       mlx5e_fill_mxbuf(rq, cqe, va, linear_hr, linear_frame_sz, linear_data_len, &mxbuf);
+       mlx5e_fill_mxbuf(rq, cqe, va, linear_hr, linear_frame_sz,
+                        linear_data_len, mxbuf);
 
-       sinfo = xdp_get_shared_info_from_buff(&mxbuf.xdp);
+       sinfo = xdp_get_shared_info_from_buff(&mxbuf->xdp);
 
        while (byte_cnt) {
                /* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */
@@ -2046,7 +2050,8 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
                else
                        truesize += ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz));
 
-               mlx5e_add_skb_shared_info_frag(rq, sinfo, &mxbuf.xdp, frag_page, frag_offset,
+               mlx5e_add_skb_shared_info_frag(rq, sinfo, &mxbuf->xdp,
+                                              frag_page, frag_offset,
                                               pg_consumed_bytes);
                byte_cnt -= pg_consumed_bytes;
                frag_offset = 0;
@@ -2054,7 +2059,7 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
        }
 
        if (prog) {
-               if (mlx5e_xdp_handle(rq, prog, &mxbuf)) {
+               if (mlx5e_xdp_handle(rq, prog, mxbuf)) {
                        if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
                                struct mlx5e_frag_page *pfp;
 
@@ -2067,10 +2072,10 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
                        return NULL; /* page/packet was consumed by XDP */
                }
 
-               skb = mlx5e_build_linear_skb(rq, mxbuf.xdp.data_hard_start,
-                                            linear_frame_sz,
-                                            mxbuf.xdp.data - mxbuf.xdp.data_hard_start, 0,
-                                            mxbuf.xdp.data - mxbuf.xdp.data_meta);
+               skb = mlx5e_build_linear_skb(
+                       rq, mxbuf->xdp.data_hard_start, linear_frame_sz,
+                       mxbuf->xdp.data - mxbuf->xdp.data_hard_start, 0,
+                       mxbuf->xdp.data - mxbuf->xdp.data_meta);
                if (unlikely(!skb)) {
                        mlx5e_page_release_fragmented(rq, &wi->linear_page);
                        return NULL;
@@ -2080,13 +2085,14 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
                wi->linear_page.frags++;
                mlx5e_page_release_fragmented(rq, &wi->linear_page);
 
-               if (xdp_buff_has_frags(&mxbuf.xdp)) {
+               if (xdp_buff_has_frags(&mxbuf->xdp)) {
                        struct mlx5e_frag_page *pagep;
 
                        /* sinfo->nr_frags is reset by build_skb, calculate again. */
                        xdp_update_skb_shared_info(skb, frag_page - head_page,
                                                   sinfo->xdp_frags_size, truesize,
-                                                  xdp_buff_is_frag_pfmemalloc(&mxbuf.xdp));
+                                                  xdp_buff_is_frag_pfmemalloc(
+                                                       &mxbuf->xdp));
 
                        pagep = head_page;
                        do
@@ -2097,12 +2103,13 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
        } else {
                dma_addr_t addr;
 
-               if (xdp_buff_has_frags(&mxbuf.xdp)) {
+               if (xdp_buff_has_frags(&mxbuf->xdp)) {
                        struct mlx5e_frag_page *pagep;
 
                        xdp_update_skb_shared_info(skb, sinfo->nr_frags,
                                                   sinfo->xdp_frags_size, truesize,
-                                                  xdp_buff_is_frag_pfmemalloc(&mxbuf.xdp));
+                                                  xdp_buff_is_frag_pfmemalloc(
+                                                       &mxbuf->xdp));
 
                        pagep = frag_page - sinfo->nr_frags;
                        do
@@ -2152,20 +2159,20 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
 
        prog = rcu_dereference(rq->xdp_prog);
        if (prog) {
-               struct mlx5e_xdp_buff mxbuf;
+               struct mlx5e_xdp_buff *mxbuf = &rq->mxbuf;
 
                net_prefetchw(va); /* xdp_frame data area */
                mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, rq->buff.frame0_sz,
-                                cqe_bcnt, &mxbuf);
-               if (mlx5e_xdp_handle(rq, prog, &mxbuf)) {
+                                cqe_bcnt, mxbuf);
+               if (mlx5e_xdp_handle(rq, prog, mxbuf)) {
                        if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
                                frag_page->frags++;
                        return NULL; /* page/packet was consumed by XDP */
                }
 
-               rx_headroom = mxbuf.xdp.data - mxbuf.xdp.data_hard_start;
-               metasize = mxbuf.xdp.data - mxbuf.xdp.data_meta;
-               cqe_bcnt = mxbuf.xdp.data_end - mxbuf.xdp.data;
+               rx_headroom = mxbuf->xdp.data - mxbuf->xdp.data_hard_start;
+               metasize =  mxbuf->xdp.data -  mxbuf->xdp.data_meta;
+               cqe_bcnt =  mxbuf->xdp.data_end -  mxbuf->xdp.data;
        }
        frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
        skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt, metasize);