From: Saeed Mahameed Date: Mon, 16 Jun 2025 14:14:37 +0000 (+0300) Subject: net/mlx5e: Convert over to netmem X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=d1668f119943aec7c10244e5481964fad24b7ba2;p=thirdparty%2Flinux.git net/mlx5e: Convert over to netmem mlx5e_page_frag holds the physical page itself, to naturally support zc page pools, remove physical page reference from mlx5 and replace it with netmem_ref, to avoid internal handling in mlx5 for net_iov backed pages. SHAMPO can issue packets that are not split into header and data. These packets will be dropped if the data part resides in a net_iov as the driver can't read into this area. No performance degradation observed. Signed-off-by: Saeed Mahameed Signed-off-by: Dragos Tatulea Reviewed-by: Dragos Tatulea Reviewed-by: Tariq Toukan Reviewed-by: Mina Almasry Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250616141441.1243044-9-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index c329de1d4f0ad..65a73913b9a24 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -553,7 +553,7 @@ struct mlx5e_icosq { } ____cacheline_aligned_in_smp; struct mlx5e_frag_page { - struct page *page; + netmem_ref netmem; u16 frags; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index e34ef53ebd0e0..2bb32082bfccd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -273,33 +273,32 @@ static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq, #define MLX5E_PAGECNT_BIAS_MAX (PAGE_SIZE / 64) -static int mlx5e_page_alloc_fragmented(struct page_pool *pool, +static int mlx5e_page_alloc_fragmented(struct page_pool *pp, struct mlx5e_frag_page *frag_page) { - struct page *page; + netmem_ref netmem = page_pool_dev_alloc_netmems(pp); - page = page_pool_dev_alloc_pages(pool); - if (unlikely(!page)) + if (unlikely(!netmem)) return -ENOMEM; - page_pool_fragment_page(page, MLX5E_PAGECNT_BIAS_MAX); + page_pool_fragment_netmem(netmem, MLX5E_PAGECNT_BIAS_MAX); *frag_page = (struct mlx5e_frag_page) { - .page = page, + .netmem = netmem, .frags = 0, }; return 0; } -static void mlx5e_page_release_fragmented(struct page_pool *pool, +static void mlx5e_page_release_fragmented(struct page_pool *pp, struct mlx5e_frag_page *frag_page) { u16 drain_count = MLX5E_PAGECNT_BIAS_MAX - frag_page->frags; - struct page *page = frag_page->page; + netmem_ref netmem = frag_page->netmem; - if (page_pool_unref_page(page, drain_count) == 0) - page_pool_put_unrefed_page(pool, page, -1, true); + if (page_pool_unref_netmem(netmem, drain_count) == 0) + page_pool_put_unrefed_netmem(pp, netmem, -1, true); } static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq, @@ -359,7 +358,7 @@ static int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe_cyc *wqe, frag->flags &= ~BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); headroom = i == 0 ? rq->buff.headroom : 0; - addr = page_pool_get_dma_addr(frag->frag_page->page); + addr = page_pool_get_dma_addr_netmem(frag->frag_page->netmem); wqe->data[i].addr = cpu_to_be64(addr + frag->offset + headroom); } @@ -500,9 +499,10 @@ mlx5e_add_skb_shared_info_frag(struct mlx5e_rq *rq, struct skb_shared_info *sinf struct xdp_buff *xdp, struct mlx5e_frag_page *frag_page, u32 frag_offset, u32 len) { + netmem_ref netmem = frag_page->netmem; skb_frag_t *frag; - dma_addr_t addr = page_pool_get_dma_addr(frag_page->page); + dma_addr_t addr = page_pool_get_dma_addr_netmem(netmem); dma_sync_single_for_cpu(rq->pdev, addr + frag_offset, len, rq->buff.map_dir); if (!xdp_buff_has_frags(xdp)) { @@ -515,9 +515,9 @@ mlx5e_add_skb_shared_info_frag(struct mlx5e_rq *rq, struct skb_shared_info *sinf } frag = &sinfo->frags[sinfo->nr_frags++]; - skb_frag_fill_page_desc(frag, frag_page->page, frag_offset, len); + skb_frag_fill_netmem_desc(frag, netmem, frag_offset, len); - if (page_is_pfmemalloc(frag_page->page)) + if (netmem_is_pfmemalloc(netmem)) xdp_buff_set_frag_pfmemalloc(xdp); sinfo->xdp_frags_size += len; } @@ -528,27 +528,29 @@ mlx5e_add_skb_frag(struct mlx5e_rq *rq, struct sk_buff *skb, u32 frag_offset, u32 len, unsigned int truesize) { - dma_addr_t addr = page_pool_get_dma_addr(frag_page->page); + dma_addr_t addr = page_pool_get_dma_addr_netmem(frag_page->netmem); u8 next_frag = skb_shinfo(skb)->nr_frags; + netmem_ref netmem = frag_page->netmem; dma_sync_single_for_cpu(rq->pdev, addr + frag_offset, len, rq->buff.map_dir); - if (skb_can_coalesce(skb, next_frag, frag_page->page, frag_offset)) { + if (skb_can_coalesce_netmem(skb, next_frag, netmem, frag_offset)) { skb_coalesce_rx_frag(skb, next_frag - 1, len, truesize); - } else { - frag_page->frags++; - skb_add_rx_frag(skb, next_frag, frag_page->page, - frag_offset, len, truesize); + return; } + + frag_page->frags++; + skb_add_rx_frag_netmem(skb, next_frag, netmem, + frag_offset, len, truesize); } static inline void mlx5e_copy_skb_header(struct mlx5e_rq *rq, struct sk_buff *skb, - struct page *page, dma_addr_t addr, + netmem_ref netmem, dma_addr_t addr, int offset_from, int dma_offset, u32 headlen) { - const void *from = page_address(page) + offset_from; + const void *from = netmem_address(netmem) + offset_from; /* Aligning len to sizeof(long) optimizes memcpy performance */ unsigned int len = ALIGN(headlen, sizeof(long)); @@ -685,7 +687,7 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, if (unlikely(err)) goto err_unmap; - addr = page_pool_get_dma_addr(frag_page->page); + addr = page_pool_get_dma_addr_netmem(frag_page->netmem); for (int j = 0; j < MLX5E_SHAMPO_WQ_HEADER_PER_PAGE; j++) { header_offset = mlx5e_shampo_hd_offset(index++); @@ -796,7 +798,8 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) err = mlx5e_page_alloc_fragmented(rq->page_pool, frag_page); if (unlikely(err)) goto err_unmap; - addr = page_pool_get_dma_addr(frag_page->page); + + addr = page_pool_get_dma_addr_netmem(frag_page->netmem); umr_wqe->inline_mtts[i] = (struct mlx5_mtt) { .ptag = cpu_to_be64(addr | MLX5_EN_WR), }; @@ -1216,7 +1219,7 @@ static void *mlx5e_shampo_get_packet_hd(struct mlx5e_rq *rq, u16 header_index) struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, header_index); u16 head_offset = mlx5e_shampo_hd_offset(header_index) + rq->buff.headroom; - return page_address(frag_page->page) + head_offset; + return netmem_address(frag_page->netmem) + head_offset; } static void mlx5e_shampo_update_ipv4_udp_hdr(struct mlx5e_rq *rq, struct iphdr *ipv4) @@ -1677,11 +1680,11 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, dma_addr_t addr; u32 frag_size; - va = page_address(frag_page->page) + wi->offset; + va = netmem_address(frag_page->netmem) + wi->offset; data = va + rx_headroom; frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); - addr = page_pool_get_dma_addr(frag_page->page); + addr = page_pool_get_dma_addr_netmem(frag_page->netmem); dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset, frag_size, rq->buff.map_dir); net_prefetch(data); @@ -1731,10 +1734,10 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi frag_page = wi->frag_page; - va = page_address(frag_page->page) + wi->offset; + va = netmem_address(frag_page->netmem) + wi->offset; frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt); - addr = page_pool_get_dma_addr(frag_page->page); + addr = page_pool_get_dma_addr_netmem(frag_page->netmem); dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset, rq->buff.frame0_sz, rq->buff.map_dir); net_prefetchw(va); /* xdp_frame data area */ @@ -2007,13 +2010,14 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w if (prog) { /* area for bpf_xdp_[store|load]_bytes */ - net_prefetchw(page_address(frag_page->page) + frag_offset); + net_prefetchw(netmem_address(frag_page->netmem) + frag_offset); if (unlikely(mlx5e_page_alloc_fragmented(rq->page_pool, &wi->linear_page))) { rq->stats->buff_alloc_err++; return NULL; } - va = page_address(wi->linear_page.page); + + va = netmem_address(wi->linear_page.netmem); net_prefetchw(va); /* xdp_frame data area */ linear_hr = XDP_PACKET_HEADROOM; linear_data_len = 0; @@ -2124,8 +2128,8 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w while (++pagep < frag_page); } /* copy header */ - addr = page_pool_get_dma_addr(head_page->page); - mlx5e_copy_skb_header(rq, skb, head_page->page, addr, + addr = page_pool_get_dma_addr_netmem(head_page->netmem); + mlx5e_copy_skb_header(rq, skb, head_page->netmem, addr, head_offset, head_offset, headlen); /* skb linear part was allocated with headlen and aligned to long */ skb->tail += headlen; @@ -2155,11 +2159,11 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, return NULL; } - va = page_address(frag_page->page) + head_offset; + va = netmem_address(frag_page->netmem) + head_offset; data = va + rx_headroom; frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); - addr = page_pool_get_dma_addr(frag_page->page); + addr = page_pool_get_dma_addr_netmem(frag_page->netmem); dma_sync_single_range_for_cpu(rq->pdev, addr, head_offset, frag_size, rq->buff.map_dir); net_prefetch(data); @@ -2198,16 +2202,19 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, struct mlx5_cqe64 *cqe, u16 header_index) { struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, header_index); - dma_addr_t page_dma_addr = page_pool_get_dma_addr(frag_page->page); u16 head_offset = mlx5e_shampo_hd_offset(header_index); - dma_addr_t dma_addr = page_dma_addr + head_offset; u16 head_size = cqe->shampo.header_size; u16 rx_headroom = rq->buff.headroom; struct sk_buff *skb = NULL; + dma_addr_t page_dma_addr; + dma_addr_t dma_addr; void *hdr, *data; u32 frag_size; - hdr = page_address(frag_page->page) + head_offset; + page_dma_addr = page_pool_get_dma_addr_netmem(frag_page->netmem); + dma_addr = page_dma_addr + head_offset; + + hdr = netmem_address(frag_page->netmem) + head_offset; data = hdr + rx_headroom; frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + head_size); @@ -2232,7 +2239,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, } net_prefetchw(skb->data); - mlx5e_copy_skb_header(rq, skb, frag_page->page, dma_addr, + mlx5e_copy_skb_header(rq, skb, frag_page->netmem, dma_addr, head_offset + rx_headroom, rx_headroom, head_size); /* skb linear part was allocated with headlen and aligned to long */ @@ -2326,11 +2333,23 @@ static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cq } if (!*skb) { - if (likely(head_size)) + if (likely(head_size)) { *skb = mlx5e_skb_from_cqe_shampo(rq, wi, cqe, header_index); - else - *skb = mlx5e_skb_from_cqe_mpwrq_nonlinear(rq, wi, cqe, cqe_bcnt, - data_offset, page_idx); + } else { + struct mlx5e_frag_page *frag_page; + + frag_page = &wi->alloc_units.frag_pages[page_idx]; + /* Drop packets with header in unreadable data area to + * prevent the kernel from touching it. + */ + if (unlikely(netmem_is_net_iov(frag_page->netmem))) + goto free_hd_entry; + *skb = mlx5e_skb_from_cqe_mpwrq_nonlinear(rq, wi, cqe, + cqe_bcnt, + data_offset, + page_idx); + } + if (unlikely(!*skb)) goto free_hd_entry;