1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
8 #define SIGNAL_PER_DIV_QUEUE 16
9 #define TH_NUMS_TO_DRAIN 2
11 enum { CQ_OK
= 0, CQ_EMPTY
= -1, CQ_POLL_ERR
= -2 };
17 unsigned int send_flags
;
20 struct postsend_info
{
21 struct dr_data_seg write
;
22 struct dr_data_seg read
;
27 struct dr_qp_rtr_attr
{
28 struct mlx5dr_cmd_gid_attr dgid_attr
;
37 struct dr_qp_rts_attr
{
43 struct dr_qp_init_attr
{
47 struct mlx5_uars_page
*uar
;
50 static int dr_parse_cqe(struct mlx5dr_cq
*dr_cq
, struct mlx5_cqe64
*cqe64
)
55 opcode
= get_cqe_opcode(cqe64
);
56 if (opcode
== MLX5_CQE_REQ_ERR
) {
57 idx
= be16_to_cpu(cqe64
->wqe_counter
) &
58 (dr_cq
->qp
->sq
.wqe_cnt
- 1);
59 dr_cq
->qp
->sq
.cc
= dr_cq
->qp
->sq
.wqe_head
[idx
] + 1;
60 } else if (opcode
== MLX5_CQE_RESP_ERR
) {
63 idx
= be16_to_cpu(cqe64
->wqe_counter
) &
64 (dr_cq
->qp
->sq
.wqe_cnt
- 1);
65 dr_cq
->qp
->sq
.cc
= dr_cq
->qp
->sq
.wqe_head
[idx
] + 1;
73 static int dr_cq_poll_one(struct mlx5dr_cq
*dr_cq
)
75 struct mlx5_cqe64
*cqe64
;
78 cqe64
= mlx5_cqwq_get_cqe(&dr_cq
->wq
);
82 mlx5_cqwq_pop(&dr_cq
->wq
);
83 err
= dr_parse_cqe(dr_cq
, cqe64
);
84 mlx5_cqwq_update_db_record(&dr_cq
->wq
);
89 static int dr_poll_cq(struct mlx5dr_cq
*dr_cq
, int ne
)
94 for (npolled
= 0; npolled
< ne
; ++npolled
) {
95 err
= dr_cq_poll_one(dr_cq
);
100 return err
== CQ_POLL_ERR
? err
: npolled
;
103 static void dr_qp_event(struct mlx5_core_qp
*mqp
, int event
)
105 pr_info("DR QP event %u on QP #%u\n", event
, mqp
->qpn
);
108 static struct mlx5dr_qp
*dr_create_rc_qp(struct mlx5_core_dev
*mdev
,
109 struct dr_qp_init_attr
*attr
)
111 u32 temp_qpc
[MLX5_ST_SZ_DW(qpc
)] = {};
112 struct mlx5_wq_param wqp
;
113 struct mlx5dr_qp
*dr_qp
;
119 dr_qp
= kzalloc(sizeof(*dr_qp
), GFP_KERNEL
);
123 wqp
.buf_numa_node
= mdev
->priv
.numa_node
;
124 wqp
.db_numa_node
= mdev
->priv
.numa_node
;
128 dr_qp
->rq
.wqe_cnt
= 4;
131 dr_qp
->sq
.wqe_cnt
= roundup_pow_of_two(attr
->max_send_wr
);
133 MLX5_SET(qpc
, temp_qpc
, log_rq_stride
, ilog2(MLX5_SEND_WQE_DS
) - 4);
134 MLX5_SET(qpc
, temp_qpc
, log_rq_size
, ilog2(dr_qp
->rq
.wqe_cnt
));
135 MLX5_SET(qpc
, temp_qpc
, log_sq_size
, ilog2(dr_qp
->sq
.wqe_cnt
));
136 err
= mlx5_wq_qp_create(mdev
, &wqp
, temp_qpc
, &dr_qp
->wq
,
139 mlx5_core_warn(mdev
, "Can't create QP WQ\n");
143 dr_qp
->sq
.wqe_head
= kcalloc(dr_qp
->sq
.wqe_cnt
,
144 sizeof(dr_qp
->sq
.wqe_head
[0]),
147 if (!dr_qp
->sq
.wqe_head
) {
148 mlx5_core_warn(mdev
, "Can't allocate wqe head\n");
152 inlen
= MLX5_ST_SZ_BYTES(create_qp_in
) +
153 MLX5_FLD_SZ_BYTES(create_qp_in
, pas
[0]) *
154 dr_qp
->wq_ctrl
.buf
.npages
;
155 in
= kvzalloc(inlen
, GFP_KERNEL
);
161 qpc
= MLX5_ADDR_OF(create_qp_in
, in
, qpc
);
162 MLX5_SET(qpc
, qpc
, st
, MLX5_QP_ST_RC
);
163 MLX5_SET(qpc
, qpc
, pm_state
, MLX5_QP_PM_MIGRATED
);
164 MLX5_SET(qpc
, qpc
, pd
, attr
->pdn
);
165 MLX5_SET(qpc
, qpc
, uar_page
, attr
->uar
->index
);
166 MLX5_SET(qpc
, qpc
, log_page_size
,
167 dr_qp
->wq_ctrl
.buf
.page_shift
- MLX5_ADAPTER_PAGE_SHIFT
);
168 MLX5_SET(qpc
, qpc
, fre
, 1);
169 MLX5_SET(qpc
, qpc
, rlky
, 1);
170 MLX5_SET(qpc
, qpc
, cqn_snd
, attr
->cqn
);
171 MLX5_SET(qpc
, qpc
, cqn_rcv
, attr
->cqn
);
172 MLX5_SET(qpc
, qpc
, log_rq_stride
, ilog2(MLX5_SEND_WQE_DS
) - 4);
173 MLX5_SET(qpc
, qpc
, log_rq_size
, ilog2(dr_qp
->rq
.wqe_cnt
));
174 MLX5_SET(qpc
, qpc
, rq_type
, MLX5_NON_ZERO_RQ
);
175 MLX5_SET(qpc
, qpc
, log_sq_size
, ilog2(dr_qp
->sq
.wqe_cnt
));
176 MLX5_SET64(qpc
, qpc
, dbr_addr
, dr_qp
->wq_ctrl
.db
.dma
);
177 if (MLX5_CAP_GEN(mdev
, cqe_version
) == 1)
178 MLX5_SET(qpc
, qpc
, user_index
, 0xFFFFFF);
179 mlx5_fill_page_frag_array(&dr_qp
->wq_ctrl
.buf
,
180 (__be64
*)MLX5_ADDR_OF(create_qp_in
,
183 err
= mlx5_core_create_qp(mdev
, &dr_qp
->mqp
, in
, inlen
);
187 mlx5_core_warn(mdev
, " Can't create QP\n");
190 dr_qp
->mqp
.event
= dr_qp_event
;
191 dr_qp
->uar
= attr
->uar
;
196 kfree(dr_qp
->sq
.wqe_head
);
198 mlx5_wq_destroy(&dr_qp
->wq_ctrl
);
204 static void dr_destroy_qp(struct mlx5_core_dev
*mdev
,
205 struct mlx5dr_qp
*dr_qp
)
207 mlx5_core_destroy_qp(mdev
, &dr_qp
->mqp
);
208 kfree(dr_qp
->sq
.wqe_head
);
209 mlx5_wq_destroy(&dr_qp
->wq_ctrl
);
213 static void dr_cmd_notify_hw(struct mlx5dr_qp
*dr_qp
, void *ctrl
)
216 *dr_qp
->wq
.sq
.db
= cpu_to_be32(dr_qp
->sq
.pc
& 0xfffff);
218 /* After wmb() the hw aware of new work */
221 mlx5_write64(ctrl
, dr_qp
->uar
->map
+ MLX5_BF_OFFSET
);
224 static void dr_rdma_segments(struct mlx5dr_qp
*dr_qp
, u64 remote_addr
,
225 u32 rkey
, struct dr_data_seg
*data_seg
,
226 u32 opcode
, int nreq
)
228 struct mlx5_wqe_raddr_seg
*wq_raddr
;
229 struct mlx5_wqe_ctrl_seg
*wq_ctrl
;
230 struct mlx5_wqe_data_seg
*wq_dseg
;
234 size
= sizeof(*wq_ctrl
) / 16 + sizeof(*wq_dseg
) / 16 +
235 sizeof(*wq_raddr
) / 16;
237 idx
= dr_qp
->sq
.pc
& (dr_qp
->sq
.wqe_cnt
- 1);
239 wq_ctrl
= mlx5_wq_cyc_get_wqe(&dr_qp
->wq
.sq
, idx
);
241 wq_ctrl
->fm_ce_se
= (data_seg
->send_flags
) ?
242 MLX5_WQE_CTRL_CQ_UPDATE
: 0;
243 wq_ctrl
->opmod_idx_opcode
= cpu_to_be32(((dr_qp
->sq
.pc
& 0xffff) << 8) |
245 wq_ctrl
->qpn_ds
= cpu_to_be32(size
| dr_qp
->mqp
.qpn
<< 8);
246 wq_raddr
= (void *)(wq_ctrl
+ 1);
247 wq_raddr
->raddr
= cpu_to_be64(remote_addr
);
248 wq_raddr
->rkey
= cpu_to_be32(rkey
);
249 wq_raddr
->reserved
= 0;
251 wq_dseg
= (void *)(wq_raddr
+ 1);
252 wq_dseg
->byte_count
= cpu_to_be32(data_seg
->length
);
253 wq_dseg
->lkey
= cpu_to_be32(data_seg
->lkey
);
254 wq_dseg
->addr
= cpu_to_be64(data_seg
->addr
);
256 dr_qp
->sq
.wqe_head
[idx
] = dr_qp
->sq
.pc
++;
259 dr_cmd_notify_hw(dr_qp
, wq_ctrl
);
262 static void dr_post_send(struct mlx5dr_qp
*dr_qp
, struct postsend_info
*send_info
)
264 dr_rdma_segments(dr_qp
, send_info
->remote_addr
, send_info
->rkey
,
265 &send_info
->write
, MLX5_OPCODE_RDMA_WRITE
, 0);
266 dr_rdma_segments(dr_qp
, send_info
->remote_addr
, send_info
->rkey
,
267 &send_info
->read
, MLX5_OPCODE_RDMA_READ
, 1);
271 * mlx5dr_send_fill_and_append_ste_send_info: Add data to be sent
272 * with send_list parameters:
274 * @ste: The data that attached to this specific ste
275 * @size: of data to write
276 * @offset: of the data from start of the hw_ste entry
278 * @ste_info: ste to be sent with send_list
279 * @send_list: to append into it
280 * @copy_data: if true indicates that the data should be kept because
281 * it's not backuped any where (like in re-hash).
282 * if false, it lets the data to be updated after
283 * it was added to the list.
285 void mlx5dr_send_fill_and_append_ste_send_info(struct mlx5dr_ste
*ste
, u16 size
,
286 u16 offset
, u8
*data
,
287 struct mlx5dr_ste_send_info
*ste_info
,
288 struct list_head
*send_list
,
291 ste_info
->size
= size
;
293 ste_info
->offset
= offset
;
296 memcpy(ste_info
->data_cont
, data
, size
);
297 ste_info
->data
= ste_info
->data_cont
;
299 ste_info
->data
= data
;
302 list_add_tail(&ste_info
->send_list
, send_list
);
305 /* The function tries to consume one wc each time, unless the queue is full, in
306 * that case, which means that the hw is behind the sw in a full queue len
307 * the function will drain the cq till it empty.
309 static int dr_handle_pending_wc(struct mlx5dr_domain
*dmn
,
310 struct mlx5dr_send_ring
*send_ring
)
312 bool is_drain
= false;
315 if (send_ring
->pending_wqe
< send_ring
->signal_th
)
318 /* Queue is full start drain it */
319 if (send_ring
->pending_wqe
>=
320 dmn
->send_ring
->signal_th
* TH_NUMS_TO_DRAIN
)
324 ne
= dr_poll_cq(send_ring
->cq
, 1);
328 send_ring
->pending_wqe
-= send_ring
->signal_th
;
329 } while (is_drain
&& send_ring
->pending_wqe
);
334 static void dr_fill_data_segs(struct mlx5dr_send_ring
*send_ring
,
335 struct postsend_info
*send_info
)
337 send_ring
->pending_wqe
++;
339 if (send_ring
->pending_wqe
% send_ring
->signal_th
== 0)
340 send_info
->write
.send_flags
|= IB_SEND_SIGNALED
;
342 send_ring
->pending_wqe
++;
343 send_info
->read
.length
= send_info
->write
.length
;
344 /* Read into the same write area */
345 send_info
->read
.addr
= (uintptr_t)send_info
->write
.addr
;
346 send_info
->read
.lkey
= send_ring
->mr
->mkey
.key
;
348 if (send_ring
->pending_wqe
% send_ring
->signal_th
== 0)
349 send_info
->read
.send_flags
= IB_SEND_SIGNALED
;
351 send_info
->read
.send_flags
= 0;
354 static int dr_postsend_icm_data(struct mlx5dr_domain
*dmn
,
355 struct postsend_info
*send_info
)
357 struct mlx5dr_send_ring
*send_ring
= dmn
->send_ring
;
361 ret
= dr_handle_pending_wc(dmn
, send_ring
);
365 if (send_info
->write
.length
> dmn
->info
.max_inline_size
) {
366 buff_offset
= (send_ring
->tx_head
&
367 (dmn
->send_ring
->signal_th
- 1)) *
368 send_ring
->max_post_send_size
;
369 /* Copy to ring mr */
370 memcpy(send_ring
->buf
+ buff_offset
,
371 (void *)(uintptr_t)send_info
->write
.addr
,
372 send_info
->write
.length
);
373 send_info
->write
.addr
= (uintptr_t)send_ring
->mr
->dma_addr
+ buff_offset
;
374 send_info
->write
.lkey
= send_ring
->mr
->mkey
.key
;
377 send_ring
->tx_head
++;
378 dr_fill_data_segs(send_ring
, send_info
);
379 dr_post_send(send_ring
->qp
, send_info
);
384 static int dr_get_tbl_copy_details(struct mlx5dr_domain
*dmn
,
385 struct mlx5dr_ste_htbl
*htbl
,
393 if (htbl
->chunk
->byte_size
> dmn
->send_ring
->max_post_send_size
) {
394 *iterations
= htbl
->chunk
->byte_size
/
395 dmn
->send_ring
->max_post_send_size
;
396 *byte_size
= dmn
->send_ring
->max_post_send_size
;
397 alloc_size
= *byte_size
;
398 *num_stes
= *byte_size
/ DR_STE_SIZE
;
401 *num_stes
= htbl
->chunk
->num_of_entries
;
402 alloc_size
= *num_stes
* DR_STE_SIZE
;
405 *data
= kzalloc(alloc_size
, GFP_KERNEL
);
413 * mlx5dr_send_postsend_ste: write size bytes into offset from the hw cm.
416 * @ste: The ste struct that contains the data (at
418 * @data: The real data to send size data
419 * @size: for writing.
420 * @offset: The offset from the icm mapped data to
421 * start write to this for write only part of the
424 * Return: 0 on success.
426 int mlx5dr_send_postsend_ste(struct mlx5dr_domain
*dmn
, struct mlx5dr_ste
*ste
,
427 u8
*data
, u16 size
, u16 offset
)
429 struct postsend_info send_info
= {};
431 send_info
.write
.addr
= (uintptr_t)data
;
432 send_info
.write
.length
= size
;
433 send_info
.write
.lkey
= 0;
434 send_info
.remote_addr
= mlx5dr_ste_get_mr_addr(ste
) + offset
;
435 send_info
.rkey
= ste
->htbl
->chunk
->rkey
;
437 return dr_postsend_icm_data(dmn
, &send_info
);
440 int mlx5dr_send_postsend_htbl(struct mlx5dr_domain
*dmn
,
441 struct mlx5dr_ste_htbl
*htbl
,
442 u8
*formatted_ste
, u8
*mask
)
444 u32 byte_size
= htbl
->chunk
->byte_size
;
445 int num_stes_per_iter
;
452 ret
= dr_get_tbl_copy_details(dmn
, htbl
, &data
, &byte_size
,
453 &iterations
, &num_stes_per_iter
);
457 /* Send the data iteration times */
458 for (i
= 0; i
< iterations
; i
++) {
459 u32 ste_index
= i
* (byte_size
/ DR_STE_SIZE
);
460 struct postsend_info send_info
= {};
462 /* Copy all ste's on the data buffer
463 * need to add the bit_mask
465 for (j
= 0; j
< num_stes_per_iter
; j
++) {
466 u8
*hw_ste
= htbl
->ste_arr
[ste_index
+ j
].hw_ste
;
467 u32 ste_off
= j
* DR_STE_SIZE
;
469 if (mlx5dr_ste_is_not_valid_entry(hw_ste
)) {
470 memcpy(data
+ ste_off
,
471 formatted_ste
, DR_STE_SIZE
);
474 memcpy(data
+ ste_off
,
475 htbl
->ste_arr
[ste_index
+ j
].hw_ste
,
476 DR_STE_SIZE_REDUCED
);
478 memcpy(data
+ ste_off
+ DR_STE_SIZE_REDUCED
,
479 mask
, DR_STE_SIZE_MASK
);
483 send_info
.write
.addr
= (uintptr_t)data
;
484 send_info
.write
.length
= byte_size
;
485 send_info
.write
.lkey
= 0;
486 send_info
.remote_addr
=
487 mlx5dr_ste_get_mr_addr(htbl
->ste_arr
+ ste_index
);
488 send_info
.rkey
= htbl
->chunk
->rkey
;
490 ret
= dr_postsend_icm_data(dmn
, &send_info
);
500 /* Initialize htble with default STEs */
501 int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain
*dmn
,
502 struct mlx5dr_ste_htbl
*htbl
,
506 u32 byte_size
= htbl
->chunk
->byte_size
;
513 ret
= dr_get_tbl_copy_details(dmn
, htbl
, &data
, &byte_size
,
514 &iterations
, &num_stes
);
518 for (i
= 0; i
< num_stes
; i
++) {
521 /* Copy the same ste on the data buffer */
522 copy_dst
= data
+ i
* DR_STE_SIZE
;
523 memcpy(copy_dst
, ste_init_data
, DR_STE_SIZE
);
526 /* Copy the reduced ste to hash table ste_arr */
527 copy_dst
= htbl
->hw_ste_arr
+ i
* DR_STE_SIZE_REDUCED
;
528 memcpy(copy_dst
, ste_init_data
, DR_STE_SIZE_REDUCED
);
532 /* Send the data iteration times */
533 for (i
= 0; i
< iterations
; i
++) {
534 u8 ste_index
= i
* (byte_size
/ DR_STE_SIZE
);
535 struct postsend_info send_info
= {};
537 send_info
.write
.addr
= (uintptr_t)data
;
538 send_info
.write
.length
= byte_size
;
539 send_info
.write
.lkey
= 0;
540 send_info
.remote_addr
=
541 mlx5dr_ste_get_mr_addr(htbl
->ste_arr
+ ste_index
);
542 send_info
.rkey
= htbl
->chunk
->rkey
;
544 ret
= dr_postsend_icm_data(dmn
, &send_info
);
554 int mlx5dr_send_postsend_action(struct mlx5dr_domain
*dmn
,
555 struct mlx5dr_action
*action
)
557 struct postsend_info send_info
= {};
560 send_info
.write
.addr
= (uintptr_t)action
->rewrite
.data
;
561 send_info
.write
.length
= action
->rewrite
.num_of_actions
*
562 DR_MODIFY_ACTION_SIZE
;
563 send_info
.write
.lkey
= 0;
564 send_info
.remote_addr
= action
->rewrite
.chunk
->mr_addr
;
565 send_info
.rkey
= action
->rewrite
.chunk
->rkey
;
567 mutex_lock(&dmn
->mutex
);
568 ret
= dr_postsend_icm_data(dmn
, &send_info
);
569 mutex_unlock(&dmn
->mutex
);
574 static int dr_modify_qp_rst2init(struct mlx5_core_dev
*mdev
,
575 struct mlx5dr_qp
*dr_qp
,
578 u32 in
[MLX5_ST_SZ_DW(rst2init_qp_in
)] = {};
581 qpc
= MLX5_ADDR_OF(rst2init_qp_in
, in
, qpc
);
583 MLX5_SET(qpc
, qpc
, primary_address_path
.vhca_port_num
, port
);
584 MLX5_SET(qpc
, qpc
, pm_state
, MLX5_QPC_PM_STATE_MIGRATED
);
585 MLX5_SET(qpc
, qpc
, rre
, 1);
586 MLX5_SET(qpc
, qpc
, rwe
, 1);
588 return mlx5_core_qp_modify(mdev
, MLX5_CMD_OP_RST2INIT_QP
, 0, qpc
,
592 static int dr_cmd_modify_qp_rtr2rts(struct mlx5_core_dev
*mdev
,
593 struct mlx5dr_qp
*dr_qp
,
594 struct dr_qp_rts_attr
*attr
)
596 u32 in
[MLX5_ST_SZ_DW(rtr2rts_qp_in
)] = {};
599 qpc
= MLX5_ADDR_OF(rtr2rts_qp_in
, in
, qpc
);
601 MLX5_SET(rtr2rts_qp_in
, in
, qpn
, dr_qp
->mqp
.qpn
);
603 MLX5_SET(qpc
, qpc
, log_ack_req_freq
, 0);
604 MLX5_SET(qpc
, qpc
, retry_count
, attr
->retry_cnt
);
605 MLX5_SET(qpc
, qpc
, rnr_retry
, attr
->rnr_retry
);
607 return mlx5_core_qp_modify(mdev
, MLX5_CMD_OP_RTR2RTS_QP
, 0, qpc
,
611 static int dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev
*mdev
,
612 struct mlx5dr_qp
*dr_qp
,
613 struct dr_qp_rtr_attr
*attr
)
615 u32 in
[MLX5_ST_SZ_DW(init2rtr_qp_in
)] = {};
618 qpc
= MLX5_ADDR_OF(init2rtr_qp_in
, in
, qpc
);
620 MLX5_SET(init2rtr_qp_in
, in
, qpn
, dr_qp
->mqp
.qpn
);
622 MLX5_SET(qpc
, qpc
, mtu
, attr
->mtu
);
623 MLX5_SET(qpc
, qpc
, log_msg_max
, DR_CHUNK_SIZE_MAX
- 1);
624 MLX5_SET(qpc
, qpc
, remote_qpn
, attr
->qp_num
);
625 memcpy(MLX5_ADDR_OF(qpc
, qpc
, primary_address_path
.rmac_47_32
),
626 attr
->dgid_attr
.mac
, sizeof(attr
->dgid_attr
.mac
));
627 memcpy(MLX5_ADDR_OF(qpc
, qpc
, primary_address_path
.rgid_rip
),
628 attr
->dgid_attr
.gid
, sizeof(attr
->dgid_attr
.gid
));
629 MLX5_SET(qpc
, qpc
, primary_address_path
.src_addr_index
,
632 if (attr
->dgid_attr
.roce_ver
== MLX5_ROCE_VERSION_2
)
633 MLX5_SET(qpc
, qpc
, primary_address_path
.udp_sport
,
636 MLX5_SET(qpc
, qpc
, primary_address_path
.vhca_port_num
, attr
->port_num
);
637 MLX5_SET(qpc
, qpc
, min_rnr_nak
, 1);
639 return mlx5_core_qp_modify(mdev
, MLX5_CMD_OP_INIT2RTR_QP
, 0, qpc
,
643 static int dr_prepare_qp_to_rts(struct mlx5dr_domain
*dmn
)
645 struct mlx5dr_qp
*dr_qp
= dmn
->send_ring
->qp
;
646 struct dr_qp_rts_attr rts_attr
= {};
647 struct dr_qp_rtr_attr rtr_attr
= {};
648 enum ib_mtu mtu
= IB_MTU_1024
;
654 ret
= dr_modify_qp_rst2init(dmn
->mdev
, dr_qp
, port
);
656 mlx5dr_err(dmn
, "Failed modify QP rst2init\n");
661 ret
= mlx5dr_cmd_query_gid(dmn
->mdev
, port
, gid_index
, &rtr_attr
.dgid_attr
);
666 rtr_attr
.qp_num
= dr_qp
->mqp
.qpn
;
667 rtr_attr
.min_rnr_timer
= 12;
668 rtr_attr
.port_num
= port
;
669 rtr_attr
.sgid_index
= gid_index
;
670 rtr_attr
.udp_src_port
= dmn
->info
.caps
.roce_min_src_udp
;
672 ret
= dr_cmd_modify_qp_init2rtr(dmn
->mdev
, dr_qp
, &rtr_attr
);
674 mlx5dr_err(dmn
, "Failed modify QP init2rtr\n");
679 rts_attr
.timeout
= 14;
680 rts_attr
.retry_cnt
= 7;
681 rts_attr
.rnr_retry
= 7;
683 ret
= dr_cmd_modify_qp_rtr2rts(dmn
->mdev
, dr_qp
, &rts_attr
);
685 mlx5dr_err(dmn
, "Failed modify QP rtr2rts\n");
692 static void dr_cq_event(struct mlx5_core_cq
*mcq
,
693 enum mlx5_event event
)
695 pr_info("CQ event %u on CQ #%u\n", event
, mcq
->cqn
);
698 static void dr_cq_complete(struct mlx5_core_cq
*mcq
,
699 struct mlx5_eqe
*eqe
)
701 pr_err("CQ completion CQ: #%u\n", mcq
->cqn
);
704 static struct mlx5dr_cq
*dr_create_cq(struct mlx5_core_dev
*mdev
,
705 struct mlx5_uars_page
*uar
,
708 u32 temp_cqc
[MLX5_ST_SZ_DW(cqc
)] = {};
709 u32 out
[MLX5_ST_SZ_DW(create_cq_out
)];
710 struct mlx5_wq_param wqp
;
711 struct mlx5_cqe64
*cqe
;
712 struct mlx5dr_cq
*cq
;
720 cq
= kzalloc(sizeof(*cq
), GFP_KERNEL
);
724 ncqe
= roundup_pow_of_two(ncqe
);
725 MLX5_SET(cqc
, temp_cqc
, log_cq_size
, ilog2(ncqe
));
727 wqp
.buf_numa_node
= mdev
->priv
.numa_node
;
728 wqp
.db_numa_node
= mdev
->priv
.numa_node
;
730 err
= mlx5_cqwq_create(mdev
, &wqp
, temp_cqc
, &cq
->wq
,
735 for (i
= 0; i
< mlx5_cqwq_get_size(&cq
->wq
); i
++) {
736 cqe
= mlx5_cqwq_get_wqe(&cq
->wq
, i
);
737 cqe
->op_own
= MLX5_CQE_INVALID
<< 4 | MLX5_CQE_OWNER_MASK
;
740 inlen
= MLX5_ST_SZ_BYTES(create_cq_in
) +
741 sizeof(u64
) * cq
->wq_ctrl
.buf
.npages
;
742 in
= kvzalloc(inlen
, GFP_KERNEL
);
746 vector
= raw_smp_processor_id() % mlx5_comp_vectors_count(mdev
);
747 err
= mlx5_vector2eqn(mdev
, vector
, &eqn
, &irqn
);
753 cqc
= MLX5_ADDR_OF(create_cq_in
, in
, cq_context
);
754 MLX5_SET(cqc
, cqc
, log_cq_size
, ilog2(ncqe
));
755 MLX5_SET(cqc
, cqc
, c_eqn
, eqn
);
756 MLX5_SET(cqc
, cqc
, uar_page
, uar
->index
);
757 MLX5_SET(cqc
, cqc
, log_page_size
, cq
->wq_ctrl
.buf
.page_shift
-
758 MLX5_ADAPTER_PAGE_SHIFT
);
759 MLX5_SET64(cqc
, cqc
, dbr_addr
, cq
->wq_ctrl
.db
.dma
);
761 pas
= (__be64
*)MLX5_ADDR_OF(create_cq_in
, in
, pas
);
762 mlx5_fill_page_frag_array(&cq
->wq_ctrl
.buf
, pas
);
764 cq
->mcq
.event
= dr_cq_event
;
765 cq
->mcq
.comp
= dr_cq_complete
;
767 err
= mlx5_core_create_cq(mdev
, &cq
->mcq
, in
, inlen
, out
, sizeof(out
));
774 cq
->mcq
.set_ci_db
= cq
->wq_ctrl
.db
.db
;
775 cq
->mcq
.arm_db
= cq
->wq_ctrl
.db
.db
+ 1;
776 *cq
->mcq
.set_ci_db
= 0;
778 /* set no-zero value, in order to avoid the HW to run db-recovery on
779 * CQ that used in polling mode.
781 *cq
->mcq
.arm_db
= cpu_to_be32(2 << 28);
790 mlx5_wq_destroy(&cq
->wq_ctrl
);
796 static void dr_destroy_cq(struct mlx5_core_dev
*mdev
, struct mlx5dr_cq
*cq
)
798 mlx5_core_destroy_cq(mdev
, &cq
->mcq
);
799 mlx5_wq_destroy(&cq
->wq_ctrl
);
804 dr_create_mkey(struct mlx5_core_dev
*mdev
, u32 pdn
, struct mlx5_core_mkey
*mkey
)
806 u32 in
[MLX5_ST_SZ_DW(create_mkey_in
)] = {};
809 mkc
= MLX5_ADDR_OF(create_mkey_in
, in
, memory_key_mkey_entry
);
810 MLX5_SET(mkc
, mkc
, access_mode_1_0
, MLX5_MKC_ACCESS_MODE_PA
);
811 MLX5_SET(mkc
, mkc
, a
, 1);
812 MLX5_SET(mkc
, mkc
, rw
, 1);
813 MLX5_SET(mkc
, mkc
, rr
, 1);
814 MLX5_SET(mkc
, mkc
, lw
, 1);
815 MLX5_SET(mkc
, mkc
, lr
, 1);
817 MLX5_SET(mkc
, mkc
, pd
, pdn
);
818 MLX5_SET(mkc
, mkc
, length64
, 1);
819 MLX5_SET(mkc
, mkc
, qpn
, 0xffffff);
821 return mlx5_core_create_mkey(mdev
, mkey
, in
, sizeof(in
));
824 static struct mlx5dr_mr
*dr_reg_mr(struct mlx5_core_dev
*mdev
,
825 u32 pdn
, void *buf
, size_t size
)
827 struct mlx5dr_mr
*mr
= kzalloc(sizeof(*mr
), GFP_KERNEL
);
828 struct device
*dma_device
;
835 dma_device
= &mdev
->pdev
->dev
;
836 dma_addr
= dma_map_single(dma_device
, buf
, size
,
838 err
= dma_mapping_error(dma_device
, dma_addr
);
840 mlx5_core_warn(mdev
, "Can't dma buf\n");
845 err
= dr_create_mkey(mdev
, pdn
, &mr
->mkey
);
847 mlx5_core_warn(mdev
, "Can't create mkey\n");
848 dma_unmap_single(dma_device
, dma_addr
, size
,
854 mr
->dma_addr
= dma_addr
;
861 static void dr_dereg_mr(struct mlx5_core_dev
*mdev
, struct mlx5dr_mr
*mr
)
863 mlx5_core_destroy_mkey(mdev
, &mr
->mkey
);
864 dma_unmap_single(&mdev
->pdev
->dev
, mr
->dma_addr
, mr
->size
,
869 int mlx5dr_send_ring_alloc(struct mlx5dr_domain
*dmn
)
871 struct dr_qp_init_attr init_attr
= {};
876 dmn
->send_ring
= kzalloc(sizeof(*dmn
->send_ring
), GFP_KERNEL
);
880 cq_size
= QUEUE_SIZE
+ 1;
881 dmn
->send_ring
->cq
= dr_create_cq(dmn
->mdev
, dmn
->uar
, cq_size
);
882 if (!dmn
->send_ring
->cq
) {
883 mlx5dr_err(dmn
, "Failed creating CQ\n");
888 init_attr
.cqn
= dmn
->send_ring
->cq
->mcq
.cqn
;
889 init_attr
.pdn
= dmn
->pdn
;
890 init_attr
.uar
= dmn
->uar
;
891 init_attr
.max_send_wr
= QUEUE_SIZE
;
893 dmn
->send_ring
->qp
= dr_create_rc_qp(dmn
->mdev
, &init_attr
);
894 if (!dmn
->send_ring
->qp
) {
895 mlx5dr_err(dmn
, "Failed creating QP\n");
900 dmn
->send_ring
->cq
->qp
= dmn
->send_ring
->qp
;
902 dmn
->info
.max_send_wr
= QUEUE_SIZE
;
903 dmn
->info
.max_inline_size
= min(dmn
->send_ring
->qp
->max_inline_data
,
906 dmn
->send_ring
->signal_th
= dmn
->info
.max_send_wr
/
907 SIGNAL_PER_DIV_QUEUE
;
909 /* Prepare qp to be used */
910 ret
= dr_prepare_qp_to_rts(dmn
);
914 dmn
->send_ring
->max_post_send_size
=
915 mlx5dr_icm_pool_chunk_size_to_byte(DR_CHUNK_SIZE_1K
,
918 /* Allocating the max size as a buffer for writing */
919 size
= dmn
->send_ring
->signal_th
* dmn
->send_ring
->max_post_send_size
;
920 dmn
->send_ring
->buf
= kzalloc(size
, GFP_KERNEL
);
921 if (!dmn
->send_ring
->buf
) {
926 dmn
->send_ring
->buf_size
= size
;
928 dmn
->send_ring
->mr
= dr_reg_mr(dmn
->mdev
,
929 dmn
->pdn
, dmn
->send_ring
->buf
, size
);
930 if (!dmn
->send_ring
->mr
) {
935 dmn
->send_ring
->sync_mr
= dr_reg_mr(dmn
->mdev
,
936 dmn
->pdn
, dmn
->send_ring
->sync_buff
,
938 if (!dmn
->send_ring
->sync_mr
) {
946 dr_dereg_mr(dmn
->mdev
, dmn
->send_ring
->mr
);
948 kfree(dmn
->send_ring
->buf
);
950 dr_destroy_qp(dmn
->mdev
, dmn
->send_ring
->qp
);
952 dr_destroy_cq(dmn
->mdev
, dmn
->send_ring
->cq
);
954 kfree(dmn
->send_ring
);
959 void mlx5dr_send_ring_free(struct mlx5dr_domain
*dmn
,
960 struct mlx5dr_send_ring
*send_ring
)
962 dr_destroy_qp(dmn
->mdev
, send_ring
->qp
);
963 dr_destroy_cq(dmn
->mdev
, send_ring
->cq
);
964 dr_dereg_mr(dmn
->mdev
, send_ring
->sync_mr
);
965 dr_dereg_mr(dmn
->mdev
, send_ring
->mr
);
966 kfree(send_ring
->buf
);
970 int mlx5dr_send_ring_force_drain(struct mlx5dr_domain
*dmn
)
972 struct mlx5dr_send_ring
*send_ring
= dmn
->send_ring
;
973 struct postsend_info send_info
= {};
974 u8 data
[DR_STE_SIZE
];
975 int num_of_sends_req
;
979 /* Sending this amount of requests makes sure we will get drain */
980 num_of_sends_req
= send_ring
->signal_th
* TH_NUMS_TO_DRAIN
/ 2;
982 /* Send fake requests forcing the last to be signaled */
983 send_info
.write
.addr
= (uintptr_t)data
;
984 send_info
.write
.length
= DR_STE_SIZE
;
985 send_info
.write
.lkey
= 0;
986 /* Using the sync_mr in order to write/read */
987 send_info
.remote_addr
= (uintptr_t)send_ring
->sync_mr
->addr
;
988 send_info
.rkey
= send_ring
->sync_mr
->mkey
.key
;
990 for (i
= 0; i
< num_of_sends_req
; i
++) {
991 ret
= dr_postsend_icm_data(dmn
, &send_info
);
996 ret
= dr_handle_pending_wc(dmn
, send_ring
);