2 * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <linux/bpf_trace.h>
34 #include <net/xdp_sock_drv.h>
36 #include "en/params.h"
37 #include <linux/bitfield.h>
38 #include <net/page_pool/helpers.h>
40 int mlx5e_xdp_max_mtu(struct mlx5e_params
*params
, struct mlx5e_xsk_param
*xsk
)
42 int hr
= mlx5e_get_linear_rq_headroom(params
, xsk
);
44 /* Let S := SKB_DATA_ALIGN(sizeof(struct skb_shared_info)).
45 * The condition checked in mlx5e_rx_is_linear_skb is:
46 * SKB_DATA_ALIGN(sw_mtu + hard_mtu + hr) + S <= PAGE_SIZE (1)
47 * (Note that hw_mtu == sw_mtu + hard_mtu.)
48 * What is returned from this function is:
49 * max_mtu = PAGE_SIZE - S - hr - hard_mtu (2)
50 * After assigning sw_mtu := max_mtu, the left side of (1) turns to
51 * SKB_DATA_ALIGN(PAGE_SIZE - S) + S, which is equal to PAGE_SIZE,
52 * because both PAGE_SIZE and S are already aligned. Any number greater
53 * than max_mtu would make the left side of (1) greater than PAGE_SIZE,
54 * so max_mtu is the maximum MTU allowed.
57 return MLX5E_HW2SW_MTU(params
, SKB_MAX_HEAD(hr
));
61 mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq
*sq
, struct mlx5e_rq
*rq
,
64 struct page
*page
= virt_to_page(xdp
->data
);
65 struct mlx5e_xmit_data_frags xdptxdf
= {};
66 struct mlx5e_xmit_data
*xdptxd
;
67 struct xdp_frame
*xdpf
;
71 xdpf
= xdp_convert_buff_to_frame(xdp
);
76 xdptxd
->data
= xdpf
->data
;
77 xdptxd
->len
= xdpf
->len
;
78 xdptxd
->has_frags
= xdp_frame_has_frags(xdpf
);
80 if (xdp
->rxq
->mem
.type
== MEM_TYPE_XSK_BUFF_POOL
) {
81 /* The xdp_buff was in the UMEM and was copied into a newly
82 * allocated page. The UMEM page was returned via the ZCA, and
83 * this new page has to be mapped at this point and has to be
84 * unmapped and returned via xdp_return_frame on completion.
87 /* Prevent double recycling of the UMEM page. Even in case this
88 * function returns false, the xdp_buff shouldn't be recycled,
89 * as it was already done in xdp_convert_zc_to_xdp_frame.
91 __set_bit(MLX5E_RQ_FLAG_XDP_XMIT
, rq
->flags
); /* non-atomic */
93 if (unlikely(xdptxd
->has_frags
))
96 dma_addr
= dma_map_single(sq
->pdev
, xdptxd
->data
, xdptxd
->len
,
98 if (dma_mapping_error(sq
->pdev
, dma_addr
)) {
99 xdp_return_frame(xdpf
);
103 xdptxd
->dma_addr
= dma_addr
;
105 if (unlikely(!INDIRECT_CALL_2(sq
->xmit_xdp_frame
, mlx5e_xmit_xdp_frame_mpwqe
,
106 mlx5e_xmit_xdp_frame
, sq
, xdptxd
, 0)))
109 /* xmit_mode == MLX5E_XDP_XMIT_MODE_FRAME */
110 mlx5e_xdpi_fifo_push(&sq
->db
.xdpi_fifo
,
111 (union mlx5e_xdp_info
) { .mode
= MLX5E_XDP_XMIT_MODE_FRAME
});
112 mlx5e_xdpi_fifo_push(&sq
->db
.xdpi_fifo
,
113 (union mlx5e_xdp_info
) { .frame
.xdpf
= xdpf
});
114 mlx5e_xdpi_fifo_push(&sq
->db
.xdpi_fifo
,
115 (union mlx5e_xdp_info
) { .frame
.dma_addr
= dma_addr
});
119 /* Driver assumes that xdp_convert_buff_to_frame returns an xdp_frame
120 * that points to the same memory region as the original xdp_buff. It
121 * allows to map the memory only once and to use the DMA_BIDIRECTIONAL
125 dma_addr
= page_pool_get_dma_addr(page
) + (xdpf
->data
- (void *)xdpf
);
126 dma_sync_single_for_device(sq
->pdev
, dma_addr
, xdptxd
->len
, DMA_BIDIRECTIONAL
);
128 if (xdptxd
->has_frags
) {
129 xdptxdf
.sinfo
= xdp_get_shared_info_from_frame(xdpf
);
130 xdptxdf
.dma_arr
= NULL
;
132 for (i
= 0; i
< xdptxdf
.sinfo
->nr_frags
; i
++) {
133 skb_frag_t
*frag
= &xdptxdf
.sinfo
->frags
[i
];
137 addr
= page_pool_get_dma_addr(skb_frag_page(frag
)) +
139 len
= skb_frag_size(frag
);
140 dma_sync_single_for_device(sq
->pdev
, addr
, len
,
145 xdptxd
->dma_addr
= dma_addr
;
147 if (unlikely(!INDIRECT_CALL_2(sq
->xmit_xdp_frame
, mlx5e_xmit_xdp_frame_mpwqe
,
148 mlx5e_xmit_xdp_frame
, sq
, xdptxd
, 0)))
151 /* xmit_mode == MLX5E_XDP_XMIT_MODE_PAGE */
152 mlx5e_xdpi_fifo_push(&sq
->db
.xdpi_fifo
,
153 (union mlx5e_xdp_info
) { .mode
= MLX5E_XDP_XMIT_MODE_PAGE
});
155 if (xdptxd
->has_frags
) {
156 mlx5e_xdpi_fifo_push(&sq
->db
.xdpi_fifo
,
157 (union mlx5e_xdp_info
)
158 { .page
.num
= 1 + xdptxdf
.sinfo
->nr_frags
});
159 mlx5e_xdpi_fifo_push(&sq
->db
.xdpi_fifo
,
160 (union mlx5e_xdp_info
) { .page
.page
= page
});
161 for (i
= 0; i
< xdptxdf
.sinfo
->nr_frags
; i
++) {
162 skb_frag_t
*frag
= &xdptxdf
.sinfo
->frags
[i
];
164 mlx5e_xdpi_fifo_push(&sq
->db
.xdpi_fifo
,
165 (union mlx5e_xdp_info
)
166 { .page
.page
= skb_frag_page(frag
) });
169 mlx5e_xdpi_fifo_push(&sq
->db
.xdpi_fifo
,
170 (union mlx5e_xdp_info
) { .page
.num
= 1 });
171 mlx5e_xdpi_fifo_push(&sq
->db
.xdpi_fifo
,
172 (union mlx5e_xdp_info
) { .page
.page
= page
});
178 static int mlx5e_xdp_rx_timestamp(const struct xdp_md
*ctx
, u64
*timestamp
)
180 const struct mlx5e_xdp_buff
*_ctx
= (void *)ctx
;
182 if (unlikely(!mlx5e_rx_hw_stamp(_ctx
->rq
->tstamp
)))
185 *timestamp
= mlx5e_cqe_ts_to_ns(_ctx
->rq
->ptp_cyc2time
,
186 _ctx
->rq
->clock
, get_cqe_ts(_ctx
->cqe
));
190 /* Mapping HW RSS Type bits CQE_RSS_HTYPE_IP + CQE_RSS_HTYPE_L4 into 4-bits*/
191 #define RSS_TYPE_MAX_TABLE 16 /* 4-bits max 16 entries */
192 #define RSS_L4 GENMASK(1, 0)
193 #define RSS_L3 GENMASK(3, 2) /* Same as CQE_RSS_HTYPE_IP */
195 /* Valid combinations of CQE_RSS_HTYPE_IP + CQE_RSS_HTYPE_L4 sorted numerical */
196 enum mlx5_rss_hash_type
{
197 RSS_TYPE_NO_HASH
= (FIELD_PREP_CONST(RSS_L3
, CQE_RSS_IP_NONE
) |
198 FIELD_PREP_CONST(RSS_L4
, CQE_RSS_L4_NONE
)),
199 RSS_TYPE_L3_IPV4
= (FIELD_PREP_CONST(RSS_L3
, CQE_RSS_IPV4
) |
200 FIELD_PREP_CONST(RSS_L4
, CQE_RSS_L4_NONE
)),
201 RSS_TYPE_L4_IPV4_TCP
= (FIELD_PREP_CONST(RSS_L3
, CQE_RSS_IPV4
) |
202 FIELD_PREP_CONST(RSS_L4
, CQE_RSS_L4_TCP
)),
203 RSS_TYPE_L4_IPV4_UDP
= (FIELD_PREP_CONST(RSS_L3
, CQE_RSS_IPV4
) |
204 FIELD_PREP_CONST(RSS_L4
, CQE_RSS_L4_UDP
)),
205 RSS_TYPE_L4_IPV4_IPSEC
= (FIELD_PREP_CONST(RSS_L3
, CQE_RSS_IPV4
) |
206 FIELD_PREP_CONST(RSS_L4
, CQE_RSS_L4_IPSEC
)),
207 RSS_TYPE_L3_IPV6
= (FIELD_PREP_CONST(RSS_L3
, CQE_RSS_IPV6
) |
208 FIELD_PREP_CONST(RSS_L4
, CQE_RSS_L4_NONE
)),
209 RSS_TYPE_L4_IPV6_TCP
= (FIELD_PREP_CONST(RSS_L3
, CQE_RSS_IPV6
) |
210 FIELD_PREP_CONST(RSS_L4
, CQE_RSS_L4_TCP
)),
211 RSS_TYPE_L4_IPV6_UDP
= (FIELD_PREP_CONST(RSS_L3
, CQE_RSS_IPV6
) |
212 FIELD_PREP_CONST(RSS_L4
, CQE_RSS_L4_UDP
)),
213 RSS_TYPE_L4_IPV6_IPSEC
= (FIELD_PREP_CONST(RSS_L3
, CQE_RSS_IPV6
) |
214 FIELD_PREP_CONST(RSS_L4
, CQE_RSS_L4_IPSEC
)),
217 /* Invalid combinations will simply return zero, allows no boundary checks */
218 static const enum xdp_rss_hash_type mlx5_xdp_rss_type
[RSS_TYPE_MAX_TABLE
] = {
219 [RSS_TYPE_NO_HASH
] = XDP_RSS_TYPE_NONE
,
220 [1] = XDP_RSS_TYPE_NONE
, /* Implicit zero */
221 [2] = XDP_RSS_TYPE_NONE
, /* Implicit zero */
222 [3] = XDP_RSS_TYPE_NONE
, /* Implicit zero */
223 [RSS_TYPE_L3_IPV4
] = XDP_RSS_TYPE_L3_IPV4
,
224 [RSS_TYPE_L4_IPV4_TCP
] = XDP_RSS_TYPE_L4_IPV4_TCP
,
225 [RSS_TYPE_L4_IPV4_UDP
] = XDP_RSS_TYPE_L4_IPV4_UDP
,
226 [RSS_TYPE_L4_IPV4_IPSEC
] = XDP_RSS_TYPE_L4_IPV4_IPSEC
,
227 [RSS_TYPE_L3_IPV6
] = XDP_RSS_TYPE_L3_IPV6
,
228 [RSS_TYPE_L4_IPV6_TCP
] = XDP_RSS_TYPE_L4_IPV6_TCP
,
229 [RSS_TYPE_L4_IPV6_UDP
] = XDP_RSS_TYPE_L4_IPV6_UDP
,
230 [RSS_TYPE_L4_IPV6_IPSEC
] = XDP_RSS_TYPE_L4_IPV6_IPSEC
,
231 [12] = XDP_RSS_TYPE_NONE
, /* Implicit zero */
232 [13] = XDP_RSS_TYPE_NONE
, /* Implicit zero */
233 [14] = XDP_RSS_TYPE_NONE
, /* Implicit zero */
234 [15] = XDP_RSS_TYPE_NONE
, /* Implicit zero */
237 static int mlx5e_xdp_rx_hash(const struct xdp_md
*ctx
, u32
*hash
,
238 enum xdp_rss_hash_type
*rss_type
)
240 const struct mlx5e_xdp_buff
*_ctx
= (void *)ctx
;
241 const struct mlx5_cqe64
*cqe
= _ctx
->cqe
;
242 u32 hash_type
, l4_type
, ip_type
, lookup
;
244 if (unlikely(!(_ctx
->xdp
.rxq
->dev
->features
& NETIF_F_RXHASH
)))
247 *hash
= be32_to_cpu(cqe
->rss_hash_result
);
249 hash_type
= cqe
->rss_hash_type
;
250 BUILD_BUG_ON(CQE_RSS_HTYPE_IP
!= RSS_L3
); /* same mask */
251 ip_type
= hash_type
& CQE_RSS_HTYPE_IP
;
252 l4_type
= FIELD_GET(CQE_RSS_HTYPE_L4
, hash_type
);
253 lookup
= ip_type
| l4_type
;
254 *rss_type
= mlx5_xdp_rss_type
[lookup
];
259 const struct xdp_metadata_ops mlx5e_xdp_metadata_ops
= {
260 .xmo_rx_timestamp
= mlx5e_xdp_rx_timestamp
,
261 .xmo_rx_hash
= mlx5e_xdp_rx_hash
,
264 /* returns true if packet was consumed by xdp */
265 bool mlx5e_xdp_handle(struct mlx5e_rq
*rq
,
266 struct bpf_prog
*prog
, struct mlx5e_xdp_buff
*mxbuf
)
268 struct xdp_buff
*xdp
= &mxbuf
->xdp
;
272 act
= bpf_prog_run_xdp(prog
, xdp
);
277 if (unlikely(!mlx5e_xmit_xdp_buff(rq
->xdpsq
, rq
, xdp
)))
279 __set_bit(MLX5E_RQ_FLAG_XDP_XMIT
, rq
->flags
); /* non-atomic */
282 /* When XDP enabled then page-refcnt==1 here */
283 err
= xdp_do_redirect(rq
->netdev
, xdp
, prog
);
286 __set_bit(MLX5E_RQ_FLAG_XDP_XMIT
, rq
->flags
);
287 __set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT
, rq
->flags
);
288 rq
->stats
->xdp_redirect
++;
291 bpf_warn_invalid_xdp_action(rq
->netdev
, prog
, act
);
295 trace_xdp_exception(rq
->netdev
, prog
, act
);
298 rq
->stats
->xdp_drop
++;
303 static u16
mlx5e_xdpsq_get_next_pi(struct mlx5e_xdpsq
*sq
, u16 size
)
305 struct mlx5_wq_cyc
*wq
= &sq
->wq
;
306 u16 pi
, contig_wqebbs
;
308 pi
= mlx5_wq_cyc_ctr2ix(wq
, sq
->pc
);
309 contig_wqebbs
= mlx5_wq_cyc_get_contig_wqebbs(wq
, pi
);
310 if (unlikely(contig_wqebbs
< size
)) {
311 struct mlx5e_xdp_wqe_info
*wi
, *edge_wi
;
313 wi
= &sq
->db
.wqe_info
[pi
];
314 edge_wi
= wi
+ contig_wqebbs
;
316 /* Fill SQ frag edge with NOPs to avoid WQE wrapping two pages. */
317 for (; wi
< edge_wi
; wi
++) {
318 *wi
= (struct mlx5e_xdp_wqe_info
) {
322 mlx5e_post_nop(wq
, sq
->sqn
, &sq
->pc
);
324 sq
->stats
->nops
+= contig_wqebbs
;
326 pi
= mlx5_wq_cyc_ctr2ix(wq
, sq
->pc
);
332 static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq
*sq
)
334 struct mlx5e_tx_mpwqe
*session
= &sq
->mpwqe
;
335 struct mlx5e_xdpsq_stats
*stats
= sq
->stats
;
336 struct mlx5e_tx_wqe
*wqe
;
339 pi
= mlx5e_xdpsq_get_next_pi(sq
, sq
->max_sq_mpw_wqebbs
);
340 wqe
= MLX5E_TX_FETCH_WQE(sq
, pi
);
341 net_prefetchw(wqe
->data
);
343 *session
= (struct mlx5e_tx_mpwqe
) {
346 .ds_count
= MLX5E_TX_WQE_EMPTY_DS_COUNT
,
348 .inline_on
= mlx5e_xdp_get_inline_state(sq
, session
->inline_on
),
354 void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq
*sq
)
356 struct mlx5_wq_cyc
*wq
= &sq
->wq
;
357 struct mlx5e_tx_mpwqe
*session
= &sq
->mpwqe
;
358 struct mlx5_wqe_ctrl_seg
*cseg
= &session
->wqe
->ctrl
;
359 u16 ds_count
= session
->ds_count
;
360 u16 pi
= mlx5_wq_cyc_ctr2ix(wq
, sq
->pc
);
361 struct mlx5e_xdp_wqe_info
*wi
= &sq
->db
.wqe_info
[pi
];
363 cseg
->opmod_idx_opcode
=
364 cpu_to_be32((sq
->pc
<< 8) | MLX5_OPCODE_ENHANCED_MPSW
);
365 cseg
->qpn_ds
= cpu_to_be32((sq
->sqn
<< 8) | ds_count
);
367 wi
->num_wqebbs
= DIV_ROUND_UP(ds_count
, MLX5_SEND_WQEBB_NUM_DS
);
368 wi
->num_pkts
= session
->pkt_count
;
370 sq
->pc
+= wi
->num_wqebbs
;
372 sq
->doorbell_cseg
= cseg
;
374 session
->wqe
= NULL
; /* Close session */
378 MLX5E_XDP_CHECK_OK
= 1,
379 MLX5E_XDP_CHECK_START_MPWQE
= 2,
382 INDIRECT_CALLABLE_SCOPE
int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq
*sq
)
384 if (unlikely(!sq
->mpwqe
.wqe
)) {
385 if (unlikely(!mlx5e_wqc_has_room_for(&sq
->wq
, sq
->cc
, sq
->pc
,
387 /* SQ is full, ring doorbell */
388 mlx5e_xmit_xdp_doorbell(sq
);
393 return MLX5E_XDP_CHECK_START_MPWQE
;
396 return MLX5E_XDP_CHECK_OK
;
399 INDIRECT_CALLABLE_SCOPE
bool
400 mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq
*sq
, struct mlx5e_xmit_data
*xdptxd
,
403 INDIRECT_CALLABLE_SCOPE
bool
404 mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq
*sq
, struct mlx5e_xmit_data
*xdptxd
,
407 struct mlx5e_tx_mpwqe
*session
= &sq
->mpwqe
;
408 struct mlx5e_xdpsq_stats
*stats
= sq
->stats
;
409 struct mlx5e_xmit_data
*p
= xdptxd
;
410 struct mlx5e_xmit_data tmp
;
412 if (xdptxd
->has_frags
) {
413 struct mlx5e_xmit_data_frags
*xdptxdf
=
414 container_of(xdptxd
, struct mlx5e_xmit_data_frags
, xd
);
416 if (!!xdptxd
->len
+ xdptxdf
->sinfo
->nr_frags
> 1) {
417 /* MPWQE is enabled, but a multi-buffer packet is queued for
418 * transmission. MPWQE can't send fragmented packets, so close
419 * the current session and fall back to a regular WQE.
421 if (unlikely(sq
->mpwqe
.wqe
))
422 mlx5e_xdp_mpwqe_complete(sq
);
423 return mlx5e_xmit_xdp_frame(sq
, xdptxd
, 0);
426 skb_frag_t
*frag
= &xdptxdf
->sinfo
->frags
[0];
428 tmp
.data
= skb_frag_address(frag
);
429 tmp
.len
= skb_frag_size(frag
);
430 tmp
.dma_addr
= xdptxdf
->dma_arr
? xdptxdf
->dma_arr
[0] :
431 page_pool_get_dma_addr(skb_frag_page(frag
)) +
437 if (unlikely(p
->len
> sq
->hw_mtu
)) {
443 check_result
= mlx5e_xmit_xdp_frame_check_mpwqe(sq
);
444 if (unlikely(check_result
< 0))
447 if (check_result
== MLX5E_XDP_CHECK_START_MPWQE
) {
448 /* Start the session when nothing can fail, so it's guaranteed
449 * that if there is an active session, it has at least one dseg,
450 * and it's safe to complete it at any time.
452 mlx5e_xdp_mpwqe_session_start(sq
);
455 mlx5e_xdp_mpwqe_add_dseg(sq
, p
, stats
);
457 if (unlikely(mlx5e_xdp_mpwqe_is_full(session
, sq
->max_sq_mpw_wqebbs
)))
458 mlx5e_xdp_mpwqe_complete(sq
);
464 static int mlx5e_xmit_xdp_frame_check_stop_room(struct mlx5e_xdpsq
*sq
, int stop_room
)
466 if (unlikely(!mlx5e_wqc_has_room_for(&sq
->wq
, sq
->cc
, sq
->pc
, stop_room
))) {
467 /* SQ is full, ring doorbell */
468 mlx5e_xmit_xdp_doorbell(sq
);
473 return MLX5E_XDP_CHECK_OK
;
476 INDIRECT_CALLABLE_SCOPE
int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq
*sq
)
478 return mlx5e_xmit_xdp_frame_check_stop_room(sq
, 1);
481 INDIRECT_CALLABLE_SCOPE
bool
482 mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq
*sq
, struct mlx5e_xmit_data
*xdptxd
,
485 struct mlx5e_xmit_data_frags
*xdptxdf
=
486 container_of(xdptxd
, struct mlx5e_xmit_data_frags
, xd
);
487 struct mlx5_wq_cyc
*wq
= &sq
->wq
;
488 struct mlx5_wqe_ctrl_seg
*cseg
;
489 struct mlx5_wqe_data_seg
*dseg
;
490 struct mlx5_wqe_eth_seg
*eseg
;
491 struct mlx5e_tx_wqe
*wqe
;
493 dma_addr_t dma_addr
= xdptxd
->dma_addr
;
494 u32 dma_len
= xdptxd
->len
;
495 u16 ds_cnt
, inline_hdr_sz
;
502 struct mlx5e_xdpsq_stats
*stats
= sq
->stats
;
504 inline_ok
= sq
->min_inline_mode
== MLX5_INLINE_MODE_NONE
||
505 dma_len
>= MLX5E_XDP_MIN_INLINE
;
507 if (unlikely(!inline_ok
|| sq
->hw_mtu
< dma_len
)) {
513 if (sq
->min_inline_mode
!= MLX5_INLINE_MODE_NONE
)
514 inline_hdr_sz
= MLX5E_XDP_MIN_INLINE
;
516 linear
= !!(dma_len
- inline_hdr_sz
);
517 ds_cnt
= MLX5E_TX_WQE_EMPTY_DS_COUNT
+ linear
+ !!inline_hdr_sz
;
519 /* check_result must be 0 if sinfo is passed. */
523 if (xdptxd
->has_frags
) {
524 ds_cnt
+= xdptxdf
->sinfo
->nr_frags
;
525 num_frags
= xdptxdf
->sinfo
->nr_frags
;
526 num_wqebbs
= DIV_ROUND_UP(ds_cnt
, MLX5_SEND_WQEBB_NUM_DS
);
527 /* Assuming MLX5_CAP_GEN(mdev, max_wqe_sz_sq) is big
528 * enough to hold all fragments.
530 stop_room
= MLX5E_STOP_ROOM(num_wqebbs
);
533 check_result
= mlx5e_xmit_xdp_frame_check_stop_room(sq
, stop_room
);
535 if (unlikely(check_result
< 0))
538 pi
= mlx5e_xdpsq_get_next_pi(sq
, num_wqebbs
);
539 wqe
= mlx5_wq_cyc_get_wqe(wq
, pi
);
546 /* copy the inline part if required */
548 memcpy(eseg
->inline_hdr
.start
, xdptxd
->data
, sizeof(eseg
->inline_hdr
.start
));
549 memcpy(dseg
, xdptxd
->data
+ sizeof(eseg
->inline_hdr
.start
),
550 inline_hdr_sz
- sizeof(eseg
->inline_hdr
.start
));
551 dma_len
-= inline_hdr_sz
;
552 dma_addr
+= inline_hdr_sz
;
556 /* write the dma part */
558 dseg
->addr
= cpu_to_be64(dma_addr
);
559 dseg
->byte_count
= cpu_to_be32(dma_len
);
560 dseg
->lkey
= sq
->mkey_be
;
564 cseg
->opmod_idx_opcode
= cpu_to_be32((sq
->pc
<< 8) | MLX5_OPCODE_SEND
);
566 if (test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF
, &sq
->state
)) {
569 memset(&cseg
->trailer
, 0, sizeof(cseg
->trailer
));
570 memset(eseg
, 0, sizeof(*eseg
) - sizeof(eseg
->trailer
));
572 eseg
->inline_hdr
.sz
= cpu_to_be16(inline_hdr_sz
);
574 for (i
= 0; i
< num_frags
; i
++) {
575 skb_frag_t
*frag
= &xdptxdf
->sinfo
->frags
[i
];
578 addr
= xdptxdf
->dma_arr
? xdptxdf
->dma_arr
[i
] :
579 page_pool_get_dma_addr(skb_frag_page(frag
)) +
582 dseg
->addr
= cpu_to_be64(addr
);
583 dseg
->byte_count
= cpu_to_be32(skb_frag_size(frag
));
584 dseg
->lkey
= sq
->mkey_be
;
588 cseg
->qpn_ds
= cpu_to_be32((sq
->sqn
<< 8) | ds_cnt
);
590 sq
->db
.wqe_info
[pi
] = (struct mlx5e_xdp_wqe_info
) {
591 .num_wqebbs
= num_wqebbs
,
595 sq
->pc
+= num_wqebbs
;
602 sq
->doorbell_cseg
= cseg
;
608 static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq
*sq
,
609 struct mlx5e_xdp_wqe_info
*wi
,
611 struct xdp_frame_bulk
*bq
)
613 struct mlx5e_xdp_info_fifo
*xdpi_fifo
= &sq
->db
.xdpi_fifo
;
616 for (i
= 0; i
< wi
->num_pkts
; i
++) {
617 union mlx5e_xdp_info xdpi
= mlx5e_xdpi_fifo_pop(xdpi_fifo
);
620 case MLX5E_XDP_XMIT_MODE_FRAME
: {
621 /* XDP_TX from the XSK RQ and XDP_REDIRECT */
622 struct xdp_frame
*xdpf
;
625 xdpi
= mlx5e_xdpi_fifo_pop(xdpi_fifo
);
626 xdpf
= xdpi
.frame
.xdpf
;
627 xdpi
= mlx5e_xdpi_fifo_pop(xdpi_fifo
);
628 dma_addr
= xdpi
.frame
.dma_addr
;
630 dma_unmap_single(sq
->pdev
, dma_addr
,
631 xdpf
->len
, DMA_TO_DEVICE
);
632 if (xdp_frame_has_frags(xdpf
)) {
633 struct skb_shared_info
*sinfo
;
636 sinfo
= xdp_get_shared_info_from_frame(xdpf
);
637 for (j
= 0; j
< sinfo
->nr_frags
; j
++) {
638 skb_frag_t
*frag
= &sinfo
->frags
[j
];
640 xdpi
= mlx5e_xdpi_fifo_pop(xdpi_fifo
);
641 dma_addr
= xdpi
.frame
.dma_addr
;
643 dma_unmap_single(sq
->pdev
, dma_addr
,
644 skb_frag_size(frag
), DMA_TO_DEVICE
);
647 xdp_return_frame_bulk(xdpf
, bq
);
650 case MLX5E_XDP_XMIT_MODE_PAGE
: {
651 /* XDP_TX from the regular RQ */
654 xdpi
= mlx5e_xdpi_fifo_pop(xdpi_fifo
);
660 xdpi
= mlx5e_xdpi_fifo_pop(xdpi_fifo
);
661 page
= xdpi
.page
.page
;
663 /* No need to check ((page->pp_magic & ~0x3UL) == PP_SIGNATURE)
664 * as we know this is a page_pool page.
666 page_pool_recycle_direct(page
->pp
, page
);
671 case MLX5E_XDP_XMIT_MODE_XSK
:
681 bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq
*cq
)
683 struct xdp_frame_bulk bq
;
684 struct mlx5e_xdpsq
*sq
;
685 struct mlx5_cqe64
*cqe
;
690 xdp_frame_bulk_init(&bq
);
692 sq
= container_of(cq
, struct mlx5e_xdpsq
, cq
);
694 if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED
, &sq
->state
)))
697 cqe
= mlx5_cqwq_get_cqe(&cq
->wq
);
701 /* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
702 * otherwise a cq overrun may occur
708 struct mlx5e_xdp_wqe_info
*wi
;
712 mlx5_cqwq_pop(&cq
->wq
);
714 wqe_counter
= be16_to_cpu(cqe
->wqe_counter
);
717 last_wqe
= (sqcc
== wqe_counter
);
718 ci
= mlx5_wq_cyc_ctr2ix(&sq
->wq
, sqcc
);
719 wi
= &sq
->db
.wqe_info
[ci
];
721 sqcc
+= wi
->num_wqebbs
;
723 mlx5e_free_xdpsq_desc(sq
, wi
, &xsk_frames
, &bq
);
726 if (unlikely(get_cqe_opcode(cqe
) != MLX5_CQE_REQ
)) {
727 netdev_WARN_ONCE(sq
->channel
->netdev
,
728 "Bad OP in XDPSQ CQE: 0x%x\n",
729 get_cqe_opcode(cqe
));
730 mlx5e_dump_error_cqe(&sq
->cq
, sq
->sqn
,
731 (struct mlx5_err_cqe
*)cqe
);
732 mlx5_wq_cyc_wqe_dump(&sq
->wq
, ci
, wi
->num_wqebbs
);
734 } while ((++i
< MLX5E_TX_CQ_POLL_BUDGET
) && (cqe
= mlx5_cqwq_get_cqe(&cq
->wq
)));
736 xdp_flush_frame_bulk(&bq
);
739 xsk_tx_completed(sq
->xsk_pool
, xsk_frames
);
741 sq
->stats
->cqes
+= i
;
743 mlx5_cqwq_update_db_record(&cq
->wq
);
745 /* ensure cq space is freed before enabling more cqes */
749 return (i
== MLX5E_TX_CQ_POLL_BUDGET
);
752 void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq
*sq
)
754 struct xdp_frame_bulk bq
;
757 xdp_frame_bulk_init(&bq
);
759 rcu_read_lock(); /* need for xdp_return_frame_bulk */
761 while (sq
->cc
!= sq
->pc
) {
762 struct mlx5e_xdp_wqe_info
*wi
;
765 ci
= mlx5_wq_cyc_ctr2ix(&sq
->wq
, sq
->cc
);
766 wi
= &sq
->db
.wqe_info
[ci
];
768 sq
->cc
+= wi
->num_wqebbs
;
770 mlx5e_free_xdpsq_desc(sq
, wi
, &xsk_frames
, &bq
);
773 xdp_flush_frame_bulk(&bq
);
777 xsk_tx_completed(sq
->xsk_pool
, xsk_frames
);
780 int mlx5e_xdp_xmit(struct net_device
*dev
, int n
, struct xdp_frame
**frames
,
783 struct mlx5e_priv
*priv
= netdev_priv(dev
);
784 struct mlx5e_xdpsq
*sq
;
789 /* this flag is sufficient, no need to test internal sq state */
790 if (unlikely(!mlx5e_xdp_tx_is_enabled(priv
)))
793 if (unlikely(flags
& ~XDP_XMIT_FLAGS_MASK
))
796 sq_num
= smp_processor_id();
798 if (unlikely(sq_num
>= priv
->channels
.num
))
801 sq
= &priv
->channels
.c
[sq_num
]->xdpsq
;
803 for (i
= 0; i
< n
; i
++) {
804 struct mlx5e_xmit_data_frags xdptxdf
= {};
805 struct xdp_frame
*xdpf
= frames
[i
];
806 dma_addr_t dma_arr
[MAX_SKB_FRAGS
];
807 struct mlx5e_xmit_data
*xdptxd
;
810 xdptxd
= &xdptxdf
.xd
;
811 xdptxd
->data
= xdpf
->data
;
812 xdptxd
->len
= xdpf
->len
;
813 xdptxd
->has_frags
= xdp_frame_has_frags(xdpf
);
814 xdptxd
->dma_addr
= dma_map_single(sq
->pdev
, xdptxd
->data
,
815 xdptxd
->len
, DMA_TO_DEVICE
);
817 if (unlikely(dma_mapping_error(sq
->pdev
, xdptxd
->dma_addr
)))
820 if (xdptxd
->has_frags
) {
823 xdptxdf
.sinfo
= xdp_get_shared_info_from_frame(xdpf
);
824 xdptxdf
.dma_arr
= dma_arr
;
825 for (j
= 0; j
< xdptxdf
.sinfo
->nr_frags
; j
++) {
826 skb_frag_t
*frag
= &xdptxdf
.sinfo
->frags
[j
];
828 dma_arr
[j
] = dma_map_single(sq
->pdev
, skb_frag_address(frag
),
829 skb_frag_size(frag
), DMA_TO_DEVICE
);
831 if (!dma_mapping_error(sq
->pdev
, dma_arr
[j
]))
835 dma_unmap_single(sq
->pdev
, dma_arr
[j
],
836 skb_frag_size(&xdptxdf
.sinfo
->frags
[j
]),
842 ret
= INDIRECT_CALL_2(sq
->xmit_xdp_frame
, mlx5e_xmit_xdp_frame_mpwqe
,
843 mlx5e_xmit_xdp_frame
, sq
, xdptxd
, 0);
844 if (unlikely(!ret
)) {
847 dma_unmap_single(sq
->pdev
, xdptxd
->dma_addr
,
848 xdptxd
->len
, DMA_TO_DEVICE
);
849 if (!xdptxd
->has_frags
)
851 for (j
= 0; j
< xdptxdf
.sinfo
->nr_frags
; j
++)
852 dma_unmap_single(sq
->pdev
, dma_arr
[j
],
853 skb_frag_size(&xdptxdf
.sinfo
->frags
[j
]),
858 /* xmit_mode == MLX5E_XDP_XMIT_MODE_FRAME */
859 mlx5e_xdpi_fifo_push(&sq
->db
.xdpi_fifo
,
860 (union mlx5e_xdp_info
) { .mode
= MLX5E_XDP_XMIT_MODE_FRAME
});
861 mlx5e_xdpi_fifo_push(&sq
->db
.xdpi_fifo
,
862 (union mlx5e_xdp_info
) { .frame
.xdpf
= xdpf
});
863 mlx5e_xdpi_fifo_push(&sq
->db
.xdpi_fifo
,
864 (union mlx5e_xdp_info
) { .frame
.dma_addr
= xdptxd
->dma_addr
});
865 if (xdptxd
->has_frags
) {
868 for (j
= 0; j
< xdptxdf
.sinfo
->nr_frags
; j
++)
869 mlx5e_xdpi_fifo_push(&sq
->db
.xdpi_fifo
,
870 (union mlx5e_xdp_info
)
871 { .frame
.dma_addr
= dma_arr
[j
] });
877 if (flags
& XDP_XMIT_FLUSH
) {
879 mlx5e_xdp_mpwqe_complete(sq
);
880 mlx5e_xmit_xdp_doorbell(sq
);
886 void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq
*rq
)
888 struct mlx5e_xdpsq
*xdpsq
= rq
->xdpsq
;
890 if (xdpsq
->mpwqe
.wqe
)
891 mlx5e_xdp_mpwqe_complete(xdpsq
);
893 mlx5e_xmit_xdp_doorbell(xdpsq
);
895 if (test_bit(MLX5E_RQ_FLAG_XDP_REDIRECT
, rq
->flags
)) {
897 __clear_bit(MLX5E_RQ_FLAG_XDP_REDIRECT
, rq
->flags
);
901 void mlx5e_set_xmit_fp(struct mlx5e_xdpsq
*sq
, bool is_mpw
)
903 sq
->xmit_xdp_frame_check
= is_mpw
?
904 mlx5e_xmit_xdp_frame_check_mpwqe
: mlx5e_xmit_xdp_frame_check
;
905 sq
->xmit_xdp_frame
= is_mpw
?
906 mlx5e_xmit_xdp_frame_mpwqe
: mlx5e_xmit_xdp_frame
;