2 * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <linux/bpf_trace.h>
34 #include <net/xdp_sock_drv.h>
36 #include "en/params.h"
37 #include <linux/bitfield.h>
39 int mlx5e_xdp_max_mtu(struct mlx5e_params
*params
, struct mlx5e_xsk_param
*xsk
)
41 int hr
= mlx5e_get_linear_rq_headroom(params
, xsk
);
43 /* Let S := SKB_DATA_ALIGN(sizeof(struct skb_shared_info)).
44 * The condition checked in mlx5e_rx_is_linear_skb is:
45 * SKB_DATA_ALIGN(sw_mtu + hard_mtu + hr) + S <= PAGE_SIZE (1)
46 * (Note that hw_mtu == sw_mtu + hard_mtu.)
47 * What is returned from this function is:
48 * max_mtu = PAGE_SIZE - S - hr - hard_mtu (2)
49 * After assigning sw_mtu := max_mtu, the left side of (1) turns to
50 * SKB_DATA_ALIGN(PAGE_SIZE - S) + S, which is equal to PAGE_SIZE,
51 * because both PAGE_SIZE and S are already aligned. Any number greater
52 * than max_mtu would make the left side of (1) greater than PAGE_SIZE,
53 * so max_mtu is the maximum MTU allowed.
56 return MLX5E_HW2SW_MTU(params
, SKB_MAX_HEAD(hr
));
60 mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq
*sq
, struct mlx5e_rq
*rq
,
63 struct page
*page
= virt_to_page(xdp
->data
);
64 struct mlx5e_xmit_data_frags xdptxdf
= {};
65 struct mlx5e_xmit_data
*xdptxd
;
66 struct xdp_frame
*xdpf
;
70 xdpf
= xdp_convert_buff_to_frame(xdp
);
75 xdptxd
->data
= xdpf
->data
;
76 xdptxd
->len
= xdpf
->len
;
77 xdptxd
->has_frags
= xdp_frame_has_frags(xdpf
);
79 if (xdp
->rxq
->mem
.type
== MEM_TYPE_XSK_BUFF_POOL
) {
80 /* The xdp_buff was in the UMEM and was copied into a newly
81 * allocated page. The UMEM page was returned via the ZCA, and
82 * this new page has to be mapped at this point and has to be
83 * unmapped and returned via xdp_return_frame on completion.
86 /* Prevent double recycling of the UMEM page. Even in case this
87 * function returns false, the xdp_buff shouldn't be recycled,
88 * as it was already done in xdp_convert_zc_to_xdp_frame.
90 __set_bit(MLX5E_RQ_FLAG_XDP_XMIT
, rq
->flags
); /* non-atomic */
92 if (unlikely(xdptxd
->has_frags
))
95 dma_addr
= dma_map_single(sq
->pdev
, xdptxd
->data
, xdptxd
->len
,
97 if (dma_mapping_error(sq
->pdev
, dma_addr
)) {
98 xdp_return_frame(xdpf
);
102 xdptxd
->dma_addr
= dma_addr
;
104 if (unlikely(!INDIRECT_CALL_2(sq
->xmit_xdp_frame
, mlx5e_xmit_xdp_frame_mpwqe
,
105 mlx5e_xmit_xdp_frame
, sq
, xdptxd
, 0)))
108 /* xmit_mode == MLX5E_XDP_XMIT_MODE_FRAME */
109 mlx5e_xdpi_fifo_push(&sq
->db
.xdpi_fifo
,
110 (union mlx5e_xdp_info
) { .mode
= MLX5E_XDP_XMIT_MODE_FRAME
});
111 mlx5e_xdpi_fifo_push(&sq
->db
.xdpi_fifo
,
112 (union mlx5e_xdp_info
) { .frame
.xdpf
= xdpf
});
113 mlx5e_xdpi_fifo_push(&sq
->db
.xdpi_fifo
,
114 (union mlx5e_xdp_info
) { .frame
.dma_addr
= dma_addr
});
118 /* Driver assumes that xdp_convert_buff_to_frame returns an xdp_frame
119 * that points to the same memory region as the original xdp_buff. It
120 * allows to map the memory only once and to use the DMA_BIDIRECTIONAL
124 dma_addr
= page_pool_get_dma_addr(page
) + (xdpf
->data
- (void *)xdpf
);
125 dma_sync_single_for_device(sq
->pdev
, dma_addr
, xdptxd
->len
, DMA_BIDIRECTIONAL
);
127 if (xdptxd
->has_frags
) {
128 xdptxdf
.sinfo
= xdp_get_shared_info_from_frame(xdpf
);
129 xdptxdf
.dma_arr
= NULL
;
131 for (i
= 0; i
< xdptxdf
.sinfo
->nr_frags
; i
++) {
132 skb_frag_t
*frag
= &xdptxdf
.sinfo
->frags
[i
];
136 addr
= page_pool_get_dma_addr(skb_frag_page(frag
)) +
138 len
= skb_frag_size(frag
);
139 dma_sync_single_for_device(sq
->pdev
, addr
, len
,
144 xdptxd
->dma_addr
= dma_addr
;
146 if (unlikely(!INDIRECT_CALL_2(sq
->xmit_xdp_frame
, mlx5e_xmit_xdp_frame_mpwqe
,
147 mlx5e_xmit_xdp_frame
, sq
, xdptxd
, 0)))
150 /* xmit_mode == MLX5E_XDP_XMIT_MODE_PAGE */
151 mlx5e_xdpi_fifo_push(&sq
->db
.xdpi_fifo
,
152 (union mlx5e_xdp_info
) { .mode
= MLX5E_XDP_XMIT_MODE_PAGE
});
154 if (xdptxd
->has_frags
) {
155 mlx5e_xdpi_fifo_push(&sq
->db
.xdpi_fifo
,
156 (union mlx5e_xdp_info
)
157 { .page
.num
= 1 + xdptxdf
.sinfo
->nr_frags
});
158 mlx5e_xdpi_fifo_push(&sq
->db
.xdpi_fifo
,
159 (union mlx5e_xdp_info
) { .page
.page
= page
});
160 for (i
= 0; i
< xdptxdf
.sinfo
->nr_frags
; i
++) {
161 skb_frag_t
*frag
= &xdptxdf
.sinfo
->frags
[i
];
163 mlx5e_xdpi_fifo_push(&sq
->db
.xdpi_fifo
,
164 (union mlx5e_xdp_info
)
165 { .page
.page
= skb_frag_page(frag
) });
168 mlx5e_xdpi_fifo_push(&sq
->db
.xdpi_fifo
,
169 (union mlx5e_xdp_info
) { .page
.num
= 1 });
170 mlx5e_xdpi_fifo_push(&sq
->db
.xdpi_fifo
,
171 (union mlx5e_xdp_info
) { .page
.page
= page
});
177 static int mlx5e_xdp_rx_timestamp(const struct xdp_md
*ctx
, u64
*timestamp
)
179 const struct mlx5e_xdp_buff
*_ctx
= (void *)ctx
;
181 if (unlikely(!mlx5e_rx_hw_stamp(_ctx
->rq
->tstamp
)))
184 *timestamp
= mlx5e_cqe_ts_to_ns(_ctx
->rq
->ptp_cyc2time
,
185 _ctx
->rq
->clock
, get_cqe_ts(_ctx
->cqe
));
189 /* Mapping HW RSS Type bits CQE_RSS_HTYPE_IP + CQE_RSS_HTYPE_L4 into 4-bits*/
190 #define RSS_TYPE_MAX_TABLE 16 /* 4-bits max 16 entries */
191 #define RSS_L4 GENMASK(1, 0)
192 #define RSS_L3 GENMASK(3, 2) /* Same as CQE_RSS_HTYPE_IP */
194 /* Valid combinations of CQE_RSS_HTYPE_IP + CQE_RSS_HTYPE_L4 sorted numerical */
195 enum mlx5_rss_hash_type
{
196 RSS_TYPE_NO_HASH
= (FIELD_PREP_CONST(RSS_L3
, CQE_RSS_IP_NONE
) |
197 FIELD_PREP_CONST(RSS_L4
, CQE_RSS_L4_NONE
)),
198 RSS_TYPE_L3_IPV4
= (FIELD_PREP_CONST(RSS_L3
, CQE_RSS_IPV4
) |
199 FIELD_PREP_CONST(RSS_L4
, CQE_RSS_L4_NONE
)),
200 RSS_TYPE_L4_IPV4_TCP
= (FIELD_PREP_CONST(RSS_L3
, CQE_RSS_IPV4
) |
201 FIELD_PREP_CONST(RSS_L4
, CQE_RSS_L4_TCP
)),
202 RSS_TYPE_L4_IPV4_UDP
= (FIELD_PREP_CONST(RSS_L3
, CQE_RSS_IPV4
) |
203 FIELD_PREP_CONST(RSS_L4
, CQE_RSS_L4_UDP
)),
204 RSS_TYPE_L4_IPV4_IPSEC
= (FIELD_PREP_CONST(RSS_L3
, CQE_RSS_IPV4
) |
205 FIELD_PREP_CONST(RSS_L4
, CQE_RSS_L4_IPSEC
)),
206 RSS_TYPE_L3_IPV6
= (FIELD_PREP_CONST(RSS_L3
, CQE_RSS_IPV6
) |
207 FIELD_PREP_CONST(RSS_L4
, CQE_RSS_L4_NONE
)),
208 RSS_TYPE_L4_IPV6_TCP
= (FIELD_PREP_CONST(RSS_L3
, CQE_RSS_IPV6
) |
209 FIELD_PREP_CONST(RSS_L4
, CQE_RSS_L4_TCP
)),
210 RSS_TYPE_L4_IPV6_UDP
= (FIELD_PREP_CONST(RSS_L3
, CQE_RSS_IPV6
) |
211 FIELD_PREP_CONST(RSS_L4
, CQE_RSS_L4_UDP
)),
212 RSS_TYPE_L4_IPV6_IPSEC
= (FIELD_PREP_CONST(RSS_L3
, CQE_RSS_IPV6
) |
213 FIELD_PREP_CONST(RSS_L4
, CQE_RSS_L4_IPSEC
)),
216 /* Invalid combinations will simply return zero, allows no boundary checks */
217 static const enum xdp_rss_hash_type mlx5_xdp_rss_type
[RSS_TYPE_MAX_TABLE
] = {
218 [RSS_TYPE_NO_HASH
] = XDP_RSS_TYPE_NONE
,
219 [1] = XDP_RSS_TYPE_NONE
, /* Implicit zero */
220 [2] = XDP_RSS_TYPE_NONE
, /* Implicit zero */
221 [3] = XDP_RSS_TYPE_NONE
, /* Implicit zero */
222 [RSS_TYPE_L3_IPV4
] = XDP_RSS_TYPE_L3_IPV4
,
223 [RSS_TYPE_L4_IPV4_TCP
] = XDP_RSS_TYPE_L4_IPV4_TCP
,
224 [RSS_TYPE_L4_IPV4_UDP
] = XDP_RSS_TYPE_L4_IPV4_UDP
,
225 [RSS_TYPE_L4_IPV4_IPSEC
] = XDP_RSS_TYPE_L4_IPV4_IPSEC
,
226 [RSS_TYPE_L3_IPV6
] = XDP_RSS_TYPE_L3_IPV6
,
227 [RSS_TYPE_L4_IPV6_TCP
] = XDP_RSS_TYPE_L4_IPV6_TCP
,
228 [RSS_TYPE_L4_IPV6_UDP
] = XDP_RSS_TYPE_L4_IPV6_UDP
,
229 [RSS_TYPE_L4_IPV6_IPSEC
] = XDP_RSS_TYPE_L4_IPV6_IPSEC
,
230 [12] = XDP_RSS_TYPE_NONE
, /* Implicit zero */
231 [13] = XDP_RSS_TYPE_NONE
, /* Implicit zero */
232 [14] = XDP_RSS_TYPE_NONE
, /* Implicit zero */
233 [15] = XDP_RSS_TYPE_NONE
, /* Implicit zero */
236 static int mlx5e_xdp_rx_hash(const struct xdp_md
*ctx
, u32
*hash
,
237 enum xdp_rss_hash_type
*rss_type
)
239 const struct mlx5e_xdp_buff
*_ctx
= (void *)ctx
;
240 const struct mlx5_cqe64
*cqe
= _ctx
->cqe
;
241 u32 hash_type
, l4_type
, ip_type
, lookup
;
243 if (unlikely(!(_ctx
->xdp
.rxq
->dev
->features
& NETIF_F_RXHASH
)))
246 *hash
= be32_to_cpu(cqe
->rss_hash_result
);
248 hash_type
= cqe
->rss_hash_type
;
249 BUILD_BUG_ON(CQE_RSS_HTYPE_IP
!= RSS_L3
); /* same mask */
250 ip_type
= hash_type
& CQE_RSS_HTYPE_IP
;
251 l4_type
= FIELD_GET(CQE_RSS_HTYPE_L4
, hash_type
);
252 lookup
= ip_type
| l4_type
;
253 *rss_type
= mlx5_xdp_rss_type
[lookup
];
258 const struct xdp_metadata_ops mlx5e_xdp_metadata_ops
= {
259 .xmo_rx_timestamp
= mlx5e_xdp_rx_timestamp
,
260 .xmo_rx_hash
= mlx5e_xdp_rx_hash
,
263 /* returns true if packet was consumed by xdp */
264 bool mlx5e_xdp_handle(struct mlx5e_rq
*rq
,
265 struct bpf_prog
*prog
, struct mlx5e_xdp_buff
*mxbuf
)
267 struct xdp_buff
*xdp
= &mxbuf
->xdp
;
271 act
= bpf_prog_run_xdp(prog
, xdp
);
276 if (unlikely(!mlx5e_xmit_xdp_buff(rq
->xdpsq
, rq
, xdp
)))
278 __set_bit(MLX5E_RQ_FLAG_XDP_XMIT
, rq
->flags
); /* non-atomic */
281 /* When XDP enabled then page-refcnt==1 here */
282 err
= xdp_do_redirect(rq
->netdev
, xdp
, prog
);
285 __set_bit(MLX5E_RQ_FLAG_XDP_XMIT
, rq
->flags
);
286 __set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT
, rq
->flags
);
287 rq
->stats
->xdp_redirect
++;
290 bpf_warn_invalid_xdp_action(rq
->netdev
, prog
, act
);
294 trace_xdp_exception(rq
->netdev
, prog
, act
);
297 rq
->stats
->xdp_drop
++;
302 static u16
mlx5e_xdpsq_get_next_pi(struct mlx5e_xdpsq
*sq
, u16 size
)
304 struct mlx5_wq_cyc
*wq
= &sq
->wq
;
305 u16 pi
, contig_wqebbs
;
307 pi
= mlx5_wq_cyc_ctr2ix(wq
, sq
->pc
);
308 contig_wqebbs
= mlx5_wq_cyc_get_contig_wqebbs(wq
, pi
);
309 if (unlikely(contig_wqebbs
< size
)) {
310 struct mlx5e_xdp_wqe_info
*wi
, *edge_wi
;
312 wi
= &sq
->db
.wqe_info
[pi
];
313 edge_wi
= wi
+ contig_wqebbs
;
315 /* Fill SQ frag edge with NOPs to avoid WQE wrapping two pages. */
316 for (; wi
< edge_wi
; wi
++) {
317 *wi
= (struct mlx5e_xdp_wqe_info
) {
321 mlx5e_post_nop(wq
, sq
->sqn
, &sq
->pc
);
323 sq
->stats
->nops
+= contig_wqebbs
;
325 pi
= mlx5_wq_cyc_ctr2ix(wq
, sq
->pc
);
331 static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq
*sq
)
333 struct mlx5e_tx_mpwqe
*session
= &sq
->mpwqe
;
334 struct mlx5e_xdpsq_stats
*stats
= sq
->stats
;
335 struct mlx5e_tx_wqe
*wqe
;
338 pi
= mlx5e_xdpsq_get_next_pi(sq
, sq
->max_sq_mpw_wqebbs
);
339 wqe
= MLX5E_TX_FETCH_WQE(sq
, pi
);
340 net_prefetchw(wqe
->data
);
342 *session
= (struct mlx5e_tx_mpwqe
) {
345 .ds_count
= MLX5E_TX_WQE_EMPTY_DS_COUNT
,
347 .inline_on
= mlx5e_xdp_get_inline_state(sq
, session
->inline_on
),
353 void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq
*sq
)
355 struct mlx5_wq_cyc
*wq
= &sq
->wq
;
356 struct mlx5e_tx_mpwqe
*session
= &sq
->mpwqe
;
357 struct mlx5_wqe_ctrl_seg
*cseg
= &session
->wqe
->ctrl
;
358 u16 ds_count
= session
->ds_count
;
359 u16 pi
= mlx5_wq_cyc_ctr2ix(wq
, sq
->pc
);
360 struct mlx5e_xdp_wqe_info
*wi
= &sq
->db
.wqe_info
[pi
];
362 cseg
->opmod_idx_opcode
=
363 cpu_to_be32((sq
->pc
<< 8) | MLX5_OPCODE_ENHANCED_MPSW
);
364 cseg
->qpn_ds
= cpu_to_be32((sq
->sqn
<< 8) | ds_count
);
366 wi
->num_wqebbs
= DIV_ROUND_UP(ds_count
, MLX5_SEND_WQEBB_NUM_DS
);
367 wi
->num_pkts
= session
->pkt_count
;
369 sq
->pc
+= wi
->num_wqebbs
;
371 sq
->doorbell_cseg
= cseg
;
373 session
->wqe
= NULL
; /* Close session */
377 MLX5E_XDP_CHECK_OK
= 1,
378 MLX5E_XDP_CHECK_START_MPWQE
= 2,
381 INDIRECT_CALLABLE_SCOPE
int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq
*sq
)
383 if (unlikely(!sq
->mpwqe
.wqe
)) {
384 if (unlikely(!mlx5e_wqc_has_room_for(&sq
->wq
, sq
->cc
, sq
->pc
,
386 /* SQ is full, ring doorbell */
387 mlx5e_xmit_xdp_doorbell(sq
);
392 return MLX5E_XDP_CHECK_START_MPWQE
;
395 return MLX5E_XDP_CHECK_OK
;
398 INDIRECT_CALLABLE_SCOPE
bool
399 mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq
*sq
, struct mlx5e_xmit_data
*xdptxd
,
402 INDIRECT_CALLABLE_SCOPE
bool
403 mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq
*sq
, struct mlx5e_xmit_data
*xdptxd
,
406 struct mlx5e_tx_mpwqe
*session
= &sq
->mpwqe
;
407 struct mlx5e_xdpsq_stats
*stats
= sq
->stats
;
409 if (xdptxd
->has_frags
) {
410 /* MPWQE is enabled, but a multi-buffer packet is queued for
411 * transmission. MPWQE can't send fragmented packets, so close
412 * the current session and fall back to a regular WQE.
414 if (unlikely(sq
->mpwqe
.wqe
))
415 mlx5e_xdp_mpwqe_complete(sq
);
416 return mlx5e_xmit_xdp_frame(sq
, xdptxd
, 0);
419 if (unlikely(xdptxd
->len
> sq
->hw_mtu
)) {
425 check_result
= mlx5e_xmit_xdp_frame_check_mpwqe(sq
);
426 if (unlikely(check_result
< 0))
429 if (check_result
== MLX5E_XDP_CHECK_START_MPWQE
) {
430 /* Start the session when nothing can fail, so it's guaranteed
431 * that if there is an active session, it has at least one dseg,
432 * and it's safe to complete it at any time.
434 mlx5e_xdp_mpwqe_session_start(sq
);
437 mlx5e_xdp_mpwqe_add_dseg(sq
, xdptxd
, stats
);
439 if (unlikely(mlx5e_xdp_mpwqe_is_full(session
, sq
->max_sq_mpw_wqebbs
)))
440 mlx5e_xdp_mpwqe_complete(sq
);
446 static int mlx5e_xmit_xdp_frame_check_stop_room(struct mlx5e_xdpsq
*sq
, int stop_room
)
448 if (unlikely(!mlx5e_wqc_has_room_for(&sq
->wq
, sq
->cc
, sq
->pc
, stop_room
))) {
449 /* SQ is full, ring doorbell */
450 mlx5e_xmit_xdp_doorbell(sq
);
455 return MLX5E_XDP_CHECK_OK
;
458 INDIRECT_CALLABLE_SCOPE
int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq
*sq
)
460 return mlx5e_xmit_xdp_frame_check_stop_room(sq
, 1);
463 INDIRECT_CALLABLE_SCOPE
bool
464 mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq
*sq
, struct mlx5e_xmit_data
*xdptxd
,
467 struct mlx5e_xmit_data_frags
*xdptxdf
=
468 container_of(xdptxd
, struct mlx5e_xmit_data_frags
, xd
);
469 struct mlx5_wq_cyc
*wq
= &sq
->wq
;
470 struct mlx5_wqe_ctrl_seg
*cseg
;
471 struct mlx5_wqe_data_seg
*dseg
;
472 struct mlx5_wqe_eth_seg
*eseg
;
473 struct mlx5e_tx_wqe
*wqe
;
475 dma_addr_t dma_addr
= xdptxd
->dma_addr
;
476 u32 dma_len
= xdptxd
->len
;
477 u16 ds_cnt
, inline_hdr_sz
;
482 struct mlx5e_xdpsq_stats
*stats
= sq
->stats
;
484 if (unlikely(dma_len
< MLX5E_XDP_MIN_INLINE
|| sq
->hw_mtu
< dma_len
)) {
489 ds_cnt
= MLX5E_TX_WQE_EMPTY_DS_COUNT
+ 1;
490 if (sq
->min_inline_mode
!= MLX5_INLINE_MODE_NONE
)
493 /* check_result must be 0 if sinfo is passed. */
497 if (xdptxd
->has_frags
) {
498 ds_cnt
+= xdptxdf
->sinfo
->nr_frags
;
499 num_frags
= xdptxdf
->sinfo
->nr_frags
;
500 num_wqebbs
= DIV_ROUND_UP(ds_cnt
, MLX5_SEND_WQEBB_NUM_DS
);
501 /* Assuming MLX5_CAP_GEN(mdev, max_wqe_sz_sq) is big
502 * enough to hold all fragments.
504 stop_room
= MLX5E_STOP_ROOM(num_wqebbs
);
507 check_result
= mlx5e_xmit_xdp_frame_check_stop_room(sq
, stop_room
);
509 if (unlikely(check_result
< 0))
512 pi
= mlx5e_xdpsq_get_next_pi(sq
, num_wqebbs
);
513 wqe
= mlx5_wq_cyc_get_wqe(wq
, pi
);
522 /* copy the inline part if required */
523 if (sq
->min_inline_mode
!= MLX5_INLINE_MODE_NONE
) {
524 memcpy(eseg
->inline_hdr
.start
, xdptxd
->data
, sizeof(eseg
->inline_hdr
.start
));
525 memcpy(dseg
, xdptxd
->data
+ sizeof(eseg
->inline_hdr
.start
),
526 MLX5E_XDP_MIN_INLINE
- sizeof(eseg
->inline_hdr
.start
));
527 dma_len
-= MLX5E_XDP_MIN_INLINE
;
528 dma_addr
+= MLX5E_XDP_MIN_INLINE
;
529 inline_hdr_sz
= MLX5E_XDP_MIN_INLINE
;
533 /* write the dma part */
534 dseg
->addr
= cpu_to_be64(dma_addr
);
535 dseg
->byte_count
= cpu_to_be32(dma_len
);
537 cseg
->opmod_idx_opcode
= cpu_to_be32((sq
->pc
<< 8) | MLX5_OPCODE_SEND
);
539 if (test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF
, &sq
->state
)) {
542 memset(&cseg
->trailer
, 0, sizeof(cseg
->trailer
));
543 memset(eseg
, 0, sizeof(*eseg
) - sizeof(eseg
->trailer
));
545 eseg
->inline_hdr
.sz
= cpu_to_be16(inline_hdr_sz
);
546 dseg
->lkey
= sq
->mkey_be
;
548 for (i
= 0; i
< num_frags
; i
++) {
549 skb_frag_t
*frag
= &xdptxdf
->sinfo
->frags
[i
];
552 addr
= xdptxdf
->dma_arr
? xdptxdf
->dma_arr
[i
] :
553 page_pool_get_dma_addr(skb_frag_page(frag
)) +
557 dseg
->addr
= cpu_to_be64(addr
);
558 dseg
->byte_count
= cpu_to_be32(skb_frag_size(frag
));
559 dseg
->lkey
= sq
->mkey_be
;
562 cseg
->qpn_ds
= cpu_to_be32((sq
->sqn
<< 8) | ds_cnt
);
564 sq
->db
.wqe_info
[pi
] = (struct mlx5e_xdp_wqe_info
) {
565 .num_wqebbs
= num_wqebbs
,
569 sq
->pc
+= num_wqebbs
;
576 sq
->doorbell_cseg
= cseg
;
582 static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq
*sq
,
583 struct mlx5e_xdp_wqe_info
*wi
,
585 struct xdp_frame_bulk
*bq
)
587 struct mlx5e_xdp_info_fifo
*xdpi_fifo
= &sq
->db
.xdpi_fifo
;
590 for (i
= 0; i
< wi
->num_pkts
; i
++) {
591 union mlx5e_xdp_info xdpi
= mlx5e_xdpi_fifo_pop(xdpi_fifo
);
594 case MLX5E_XDP_XMIT_MODE_FRAME
: {
595 /* XDP_TX from the XSK RQ and XDP_REDIRECT */
596 struct xdp_frame
*xdpf
;
599 xdpi
= mlx5e_xdpi_fifo_pop(xdpi_fifo
);
600 xdpf
= xdpi
.frame
.xdpf
;
601 xdpi
= mlx5e_xdpi_fifo_pop(xdpi_fifo
);
602 dma_addr
= xdpi
.frame
.dma_addr
;
604 dma_unmap_single(sq
->pdev
, dma_addr
,
605 xdpf
->len
, DMA_TO_DEVICE
);
606 if (xdp_frame_has_frags(xdpf
)) {
607 struct skb_shared_info
*sinfo
;
610 sinfo
= xdp_get_shared_info_from_frame(xdpf
);
611 for (j
= 0; j
< sinfo
->nr_frags
; j
++) {
612 skb_frag_t
*frag
= &sinfo
->frags
[j
];
614 xdpi
= mlx5e_xdpi_fifo_pop(xdpi_fifo
);
615 dma_addr
= xdpi
.frame
.dma_addr
;
617 dma_unmap_single(sq
->pdev
, dma_addr
,
618 skb_frag_size(frag
), DMA_TO_DEVICE
);
621 xdp_return_frame_bulk(xdpf
, bq
);
624 case MLX5E_XDP_XMIT_MODE_PAGE
: {
625 /* XDP_TX from the regular RQ */
628 xdpi
= mlx5e_xdpi_fifo_pop(xdpi_fifo
);
634 xdpi
= mlx5e_xdpi_fifo_pop(xdpi_fifo
);
635 page
= xdpi
.page
.page
;
637 /* No need to check ((page->pp_magic & ~0x3UL) == PP_SIGNATURE)
638 * as we know this is a page_pool page.
640 page_pool_put_defragged_page(page
->pp
,
646 case MLX5E_XDP_XMIT_MODE_XSK
:
656 bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq
*cq
)
658 struct xdp_frame_bulk bq
;
659 struct mlx5e_xdpsq
*sq
;
660 struct mlx5_cqe64
*cqe
;
665 xdp_frame_bulk_init(&bq
);
667 sq
= container_of(cq
, struct mlx5e_xdpsq
, cq
);
669 if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED
, &sq
->state
)))
672 cqe
= mlx5_cqwq_get_cqe(&cq
->wq
);
676 /* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
677 * otherwise a cq overrun may occur
683 struct mlx5e_xdp_wqe_info
*wi
;
687 mlx5_cqwq_pop(&cq
->wq
);
689 wqe_counter
= be16_to_cpu(cqe
->wqe_counter
);
692 last_wqe
= (sqcc
== wqe_counter
);
693 ci
= mlx5_wq_cyc_ctr2ix(&sq
->wq
, sqcc
);
694 wi
= &sq
->db
.wqe_info
[ci
];
696 sqcc
+= wi
->num_wqebbs
;
698 mlx5e_free_xdpsq_desc(sq
, wi
, &xsk_frames
, &bq
);
701 if (unlikely(get_cqe_opcode(cqe
) != MLX5_CQE_REQ
)) {
702 netdev_WARN_ONCE(sq
->channel
->netdev
,
703 "Bad OP in XDPSQ CQE: 0x%x\n",
704 get_cqe_opcode(cqe
));
705 mlx5e_dump_error_cqe(&sq
->cq
, sq
->sqn
,
706 (struct mlx5_err_cqe
*)cqe
);
707 mlx5_wq_cyc_wqe_dump(&sq
->wq
, ci
, wi
->num_wqebbs
);
709 } while ((++i
< MLX5E_TX_CQ_POLL_BUDGET
) && (cqe
= mlx5_cqwq_get_cqe(&cq
->wq
)));
711 xdp_flush_frame_bulk(&bq
);
714 xsk_tx_completed(sq
->xsk_pool
, xsk_frames
);
716 sq
->stats
->cqes
+= i
;
718 mlx5_cqwq_update_db_record(&cq
->wq
);
720 /* ensure cq space is freed before enabling more cqes */
724 return (i
== MLX5E_TX_CQ_POLL_BUDGET
);
727 void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq
*sq
)
729 struct xdp_frame_bulk bq
;
732 xdp_frame_bulk_init(&bq
);
734 rcu_read_lock(); /* need for xdp_return_frame_bulk */
736 while (sq
->cc
!= sq
->pc
) {
737 struct mlx5e_xdp_wqe_info
*wi
;
740 ci
= mlx5_wq_cyc_ctr2ix(&sq
->wq
, sq
->cc
);
741 wi
= &sq
->db
.wqe_info
[ci
];
743 sq
->cc
+= wi
->num_wqebbs
;
745 mlx5e_free_xdpsq_desc(sq
, wi
, &xsk_frames
, &bq
);
748 xdp_flush_frame_bulk(&bq
);
752 xsk_tx_completed(sq
->xsk_pool
, xsk_frames
);
755 int mlx5e_xdp_xmit(struct net_device
*dev
, int n
, struct xdp_frame
**frames
,
758 struct mlx5e_priv
*priv
= netdev_priv(dev
);
759 struct mlx5e_xdpsq
*sq
;
764 /* this flag is sufficient, no need to test internal sq state */
765 if (unlikely(!mlx5e_xdp_tx_is_enabled(priv
)))
768 if (unlikely(flags
& ~XDP_XMIT_FLAGS_MASK
))
771 sq_num
= smp_processor_id();
773 if (unlikely(sq_num
>= priv
->channels
.num
))
776 sq
= &priv
->channels
.c
[sq_num
]->xdpsq
;
778 for (i
= 0; i
< n
; i
++) {
779 struct mlx5e_xmit_data_frags xdptxdf
= {};
780 struct xdp_frame
*xdpf
= frames
[i
];
781 dma_addr_t dma_arr
[MAX_SKB_FRAGS
];
782 struct mlx5e_xmit_data
*xdptxd
;
785 xdptxd
= &xdptxdf
.xd
;
786 xdptxd
->data
= xdpf
->data
;
787 xdptxd
->len
= xdpf
->len
;
788 xdptxd
->has_frags
= xdp_frame_has_frags(xdpf
);
789 xdptxd
->dma_addr
= dma_map_single(sq
->pdev
, xdptxd
->data
,
790 xdptxd
->len
, DMA_TO_DEVICE
);
792 if (unlikely(dma_mapping_error(sq
->pdev
, xdptxd
->dma_addr
)))
795 if (xdptxd
->has_frags
) {
798 xdptxdf
.sinfo
= xdp_get_shared_info_from_frame(xdpf
);
799 xdptxdf
.dma_arr
= dma_arr
;
800 for (j
= 0; j
< xdptxdf
.sinfo
->nr_frags
; j
++) {
801 skb_frag_t
*frag
= &xdptxdf
.sinfo
->frags
[j
];
803 dma_arr
[j
] = dma_map_single(sq
->pdev
, skb_frag_address(frag
),
804 skb_frag_size(frag
), DMA_TO_DEVICE
);
806 if (!dma_mapping_error(sq
->pdev
, dma_arr
[j
]))
810 dma_unmap_single(sq
->pdev
, dma_arr
[j
],
811 skb_frag_size(&xdptxdf
.sinfo
->frags
[j
]),
817 ret
= INDIRECT_CALL_2(sq
->xmit_xdp_frame
, mlx5e_xmit_xdp_frame_mpwqe
,
818 mlx5e_xmit_xdp_frame
, sq
, xdptxd
, 0);
819 if (unlikely(!ret
)) {
822 dma_unmap_single(sq
->pdev
, xdptxd
->dma_addr
,
823 xdptxd
->len
, DMA_TO_DEVICE
);
824 if (!xdptxd
->has_frags
)
826 for (j
= 0; j
< xdptxdf
.sinfo
->nr_frags
; j
++)
827 dma_unmap_single(sq
->pdev
, dma_arr
[j
],
828 skb_frag_size(&xdptxdf
.sinfo
->frags
[j
]),
833 /* xmit_mode == MLX5E_XDP_XMIT_MODE_FRAME */
834 mlx5e_xdpi_fifo_push(&sq
->db
.xdpi_fifo
,
835 (union mlx5e_xdp_info
) { .mode
= MLX5E_XDP_XMIT_MODE_FRAME
});
836 mlx5e_xdpi_fifo_push(&sq
->db
.xdpi_fifo
,
837 (union mlx5e_xdp_info
) { .frame
.xdpf
= xdpf
});
838 mlx5e_xdpi_fifo_push(&sq
->db
.xdpi_fifo
,
839 (union mlx5e_xdp_info
) { .frame
.dma_addr
= xdptxd
->dma_addr
});
840 if (xdptxd
->has_frags
) {
843 for (j
= 0; j
< xdptxdf
.sinfo
->nr_frags
; j
++)
844 mlx5e_xdpi_fifo_push(&sq
->db
.xdpi_fifo
,
845 (union mlx5e_xdp_info
)
846 { .frame
.dma_addr
= dma_arr
[j
] });
852 if (flags
& XDP_XMIT_FLUSH
) {
854 mlx5e_xdp_mpwqe_complete(sq
);
855 mlx5e_xmit_xdp_doorbell(sq
);
861 void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq
*rq
)
863 struct mlx5e_xdpsq
*xdpsq
= rq
->xdpsq
;
865 if (xdpsq
->mpwqe
.wqe
)
866 mlx5e_xdp_mpwqe_complete(xdpsq
);
868 mlx5e_xmit_xdp_doorbell(xdpsq
);
870 if (test_bit(MLX5E_RQ_FLAG_XDP_REDIRECT
, rq
->flags
)) {
872 __clear_bit(MLX5E_RQ_FLAG_XDP_REDIRECT
, rq
->flags
);
876 void mlx5e_set_xmit_fp(struct mlx5e_xdpsq
*sq
, bool is_mpw
)
878 sq
->xmit_xdp_frame_check
= is_mpw
?
879 mlx5e_xmit_xdp_frame_check_mpwqe
: mlx5e_xmit_xdp_frame_check
;
880 sq
->xmit_xdp_frame
= is_mpw
?
881 mlx5e_xmit_xdp_frame_mpwqe
: mlx5e_xmit_xdp_frame
;