1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
3 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
4 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
7 #include <linux/dma-mapping.h>
8 #include <net/addrconf.h>
9 #include <rdma/uverbs_ioctl.h>
12 #include "rxe_queue.h"
13 #include "rxe_hw_counters.h"
15 static int post_one_recv(struct rxe_rq
*rq
, const struct ib_recv_wr
*ibwr
);
18 static int rxe_query_device(struct ib_device
*ibdev
,
19 struct ib_device_attr
*attr
,
20 struct ib_udata
*udata
)
22 struct rxe_dev
*rxe
= to_rdev(ibdev
);
25 if (udata
->inlen
|| udata
->outlen
) {
26 rxe_dbg_dev(rxe
, "malformed udata");
31 memcpy(attr
, &rxe
->attr
, sizeof(*attr
));
36 rxe_err_dev(rxe
, "returned err = %d", err
);
40 static int rxe_query_port(struct ib_device
*ibdev
,
41 u32 port_num
, struct ib_port_attr
*attr
)
43 struct rxe_dev
*rxe
= to_rdev(ibdev
);
48 rxe_dbg_dev(rxe
, "bad port_num = %d", port_num
);
52 memcpy(attr
, &rxe
->port
.attr
, sizeof(*attr
));
54 mutex_lock(&rxe
->usdev_lock
);
55 ret
= ib_get_eth_speed(ibdev
, port_num
, &attr
->active_speed
,
58 if (attr
->state
== IB_PORT_ACTIVE
)
59 attr
->phys_state
= IB_PORT_PHYS_STATE_LINK_UP
;
60 else if (dev_get_flags(rxe
->ndev
) & IFF_UP
)
61 attr
->phys_state
= IB_PORT_PHYS_STATE_POLLING
;
63 attr
->phys_state
= IB_PORT_PHYS_STATE_DISABLED
;
65 mutex_unlock(&rxe
->usdev_lock
);
70 rxe_err_dev(rxe
, "returned err = %d", err
);
74 static int rxe_query_pkey(struct ib_device
*ibdev
,
75 u32 port_num
, u16 index
, u16
*pkey
)
77 struct rxe_dev
*rxe
= to_rdev(ibdev
);
82 rxe_dbg_dev(rxe
, "bad pkey index = %d", index
);
86 *pkey
= IB_DEFAULT_PKEY_FULL
;
90 rxe_err_dev(rxe
, "returned err = %d", err
);
94 static int rxe_modify_device(struct ib_device
*ibdev
,
95 int mask
, struct ib_device_modify
*attr
)
97 struct rxe_dev
*rxe
= to_rdev(ibdev
);
100 if (mask
& ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID
|
101 IB_DEVICE_MODIFY_NODE_DESC
)) {
103 rxe_dbg_dev(rxe
, "unsupported mask = 0x%x", mask
);
107 if (mask
& IB_DEVICE_MODIFY_SYS_IMAGE_GUID
)
108 rxe
->attr
.sys_image_guid
= cpu_to_be64(attr
->sys_image_guid
);
110 if (mask
& IB_DEVICE_MODIFY_NODE_DESC
) {
111 memcpy(rxe
->ib_dev
.node_desc
,
112 attr
->node_desc
, sizeof(rxe
->ib_dev
.node_desc
));
118 rxe_err_dev(rxe
, "returned err = %d", err
);
122 static int rxe_modify_port(struct ib_device
*ibdev
, u32 port_num
,
123 int mask
, struct ib_port_modify
*attr
)
125 struct rxe_dev
*rxe
= to_rdev(ibdev
);
126 struct rxe_port
*port
;
131 rxe_dbg_dev(rxe
, "bad port_num = %d", port_num
);
135 //TODO is shutdown useful
136 if (mask
& ~(IB_PORT_RESET_QKEY_CNTR
)) {
138 rxe_dbg_dev(rxe
, "unsupported mask = 0x%x", mask
);
143 port
->attr
.port_cap_flags
|= attr
->set_port_cap_mask
;
144 port
->attr
.port_cap_flags
&= ~attr
->clr_port_cap_mask
;
146 if (mask
& IB_PORT_RESET_QKEY_CNTR
)
147 port
->attr
.qkey_viol_cntr
= 0;
152 rxe_err_dev(rxe
, "returned err = %d", err
);
156 static enum rdma_link_layer
rxe_get_link_layer(struct ib_device
*ibdev
,
159 struct rxe_dev
*rxe
= to_rdev(ibdev
);
164 rxe_dbg_dev(rxe
, "bad port_num = %d", port_num
);
168 return IB_LINK_LAYER_ETHERNET
;
171 rxe_err_dev(rxe
, "returned err = %d", err
);
175 static int rxe_port_immutable(struct ib_device
*ibdev
, u32 port_num
,
176 struct ib_port_immutable
*immutable
)
178 struct rxe_dev
*rxe
= to_rdev(ibdev
);
179 struct ib_port_attr attr
= {};
184 rxe_dbg_dev(rxe
, "bad port_num = %d", port_num
);
188 err
= ib_query_port(ibdev
, port_num
, &attr
);
192 immutable
->core_cap_flags
= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP
;
193 immutable
->pkey_tbl_len
= attr
.pkey_tbl_len
;
194 immutable
->gid_tbl_len
= attr
.gid_tbl_len
;
195 immutable
->max_mad_size
= IB_MGMT_MAD_SIZE
;
200 rxe_err_dev(rxe
, "returned err = %d", err
);
205 static int rxe_alloc_ucontext(struct ib_ucontext
*ibuc
, struct ib_udata
*udata
)
207 struct rxe_dev
*rxe
= to_rdev(ibuc
->device
);
208 struct rxe_ucontext
*uc
= to_ruc(ibuc
);
211 err
= rxe_add_to_pool(&rxe
->uc_pool
, uc
);
213 rxe_err_dev(rxe
, "unable to create uc");
218 static void rxe_dealloc_ucontext(struct ib_ucontext
*ibuc
)
220 struct rxe_ucontext
*uc
= to_ruc(ibuc
);
223 err
= rxe_cleanup(uc
);
225 rxe_err_uc(uc
, "cleanup failed, err = %d", err
);
229 static int rxe_alloc_pd(struct ib_pd
*ibpd
, struct ib_udata
*udata
)
231 struct rxe_dev
*rxe
= to_rdev(ibpd
->device
);
232 struct rxe_pd
*pd
= to_rpd(ibpd
);
235 err
= rxe_add_to_pool(&rxe
->pd_pool
, pd
);
237 rxe_dbg_dev(rxe
, "unable to alloc pd");
244 rxe_err_dev(rxe
, "returned err = %d", err
);
248 static int rxe_dealloc_pd(struct ib_pd
*ibpd
, struct ib_udata
*udata
)
250 struct rxe_pd
*pd
= to_rpd(ibpd
);
253 err
= rxe_cleanup(pd
);
255 rxe_err_pd(pd
, "cleanup failed, err = %d", err
);
261 static int rxe_create_ah(struct ib_ah
*ibah
,
262 struct rdma_ah_init_attr
*init_attr
,
263 struct ib_udata
*udata
)
265 struct rxe_dev
*rxe
= to_rdev(ibah
->device
);
266 struct rxe_ah
*ah
= to_rah(ibah
);
267 struct rxe_create_ah_resp __user
*uresp
= NULL
;
268 int err
, cleanup_err
;
271 /* test if new user provider */
272 if (udata
->outlen
>= sizeof(*uresp
))
273 uresp
= udata
->outbuf
;
279 err
= rxe_add_to_pool_ah(&rxe
->ah_pool
, ah
,
280 init_attr
->flags
& RDMA_CREATE_AH_SLEEPABLE
);
282 rxe_dbg_dev(rxe
, "unable to create ah");
286 /* create index > 0 */
287 ah
->ah_num
= ah
->elem
.index
;
289 err
= rxe_ah_chk_attr(ah
, init_attr
->ah_attr
);
291 rxe_dbg_ah(ah
, "bad attr");
296 /* only if new user provider */
297 err
= copy_to_user(&uresp
->ah_num
, &ah
->ah_num
,
298 sizeof(uresp
->ah_num
));
301 rxe_dbg_ah(ah
, "unable to copy to user");
304 } else if (ah
->is_user
) {
305 /* only if old user provider */
309 rxe_init_av(init_attr
->ah_attr
, &ah
->av
);
315 cleanup_err
= rxe_cleanup(ah
);
317 rxe_err_ah(ah
, "cleanup failed, err = %d", cleanup_err
);
319 rxe_err_ah(ah
, "returned err = %d", err
);
323 static int rxe_modify_ah(struct ib_ah
*ibah
, struct rdma_ah_attr
*attr
)
325 struct rxe_ah
*ah
= to_rah(ibah
);
328 err
= rxe_ah_chk_attr(ah
, attr
);
330 rxe_dbg_ah(ah
, "bad attr");
334 rxe_init_av(attr
, &ah
->av
);
339 rxe_err_ah(ah
, "returned err = %d", err
);
343 static int rxe_query_ah(struct ib_ah
*ibah
, struct rdma_ah_attr
*attr
)
345 struct rxe_ah
*ah
= to_rah(ibah
);
347 memset(attr
, 0, sizeof(*attr
));
348 attr
->type
= ibah
->type
;
349 rxe_av_to_attr(&ah
->av
, attr
);
354 static int rxe_destroy_ah(struct ib_ah
*ibah
, u32 flags
)
356 struct rxe_ah
*ah
= to_rah(ibah
);
359 err
= rxe_cleanup_ah(ah
, flags
& RDMA_DESTROY_AH_SLEEPABLE
);
361 rxe_err_ah(ah
, "cleanup failed, err = %d", err
);
367 static int rxe_create_srq(struct ib_srq
*ibsrq
, struct ib_srq_init_attr
*init
,
368 struct ib_udata
*udata
)
370 struct rxe_dev
*rxe
= to_rdev(ibsrq
->device
);
371 struct rxe_pd
*pd
= to_rpd(ibsrq
->pd
);
372 struct rxe_srq
*srq
= to_rsrq(ibsrq
);
373 struct rxe_create_srq_resp __user
*uresp
= NULL
;
374 int err
, cleanup_err
;
377 if (udata
->outlen
< sizeof(*uresp
)) {
379 rxe_err_dev(rxe
, "malformed udata");
382 uresp
= udata
->outbuf
;
385 if (init
->srq_type
!= IB_SRQT_BASIC
) {
387 rxe_dbg_dev(rxe
, "srq type = %d, not supported",
392 err
= rxe_srq_chk_init(rxe
, init
);
394 rxe_dbg_dev(rxe
, "invalid init attributes");
398 err
= rxe_add_to_pool(&rxe
->srq_pool
, srq
);
400 rxe_dbg_dev(rxe
, "unable to create srq, err = %d", err
);
407 err
= rxe_srq_from_init(rxe
, srq
, init
, udata
, uresp
);
409 rxe_dbg_srq(srq
, "create srq failed, err = %d", err
);
416 cleanup_err
= rxe_cleanup(srq
);
418 rxe_err_srq(srq
, "cleanup failed, err = %d", cleanup_err
);
420 rxe_err_dev(rxe
, "returned err = %d", err
);
424 static int rxe_modify_srq(struct ib_srq
*ibsrq
, struct ib_srq_attr
*attr
,
425 enum ib_srq_attr_mask mask
,
426 struct ib_udata
*udata
)
428 struct rxe_srq
*srq
= to_rsrq(ibsrq
);
429 struct rxe_dev
*rxe
= to_rdev(ibsrq
->device
);
430 struct rxe_modify_srq_cmd cmd
= {};
434 if (udata
->inlen
< sizeof(cmd
)) {
436 rxe_dbg_srq(srq
, "malformed udata");
440 err
= ib_copy_from_udata(&cmd
, udata
, sizeof(cmd
));
443 rxe_dbg_srq(srq
, "unable to read udata");
448 err
= rxe_srq_chk_attr(rxe
, srq
, attr
, mask
);
450 rxe_dbg_srq(srq
, "bad init attributes");
454 err
= rxe_srq_from_attr(rxe
, srq
, attr
, mask
, &cmd
, udata
);
456 rxe_dbg_srq(srq
, "bad attr");
463 rxe_err_srq(srq
, "returned err = %d", err
);
467 static int rxe_query_srq(struct ib_srq
*ibsrq
, struct ib_srq_attr
*attr
)
469 struct rxe_srq
*srq
= to_rsrq(ibsrq
);
474 rxe_dbg_srq(srq
, "srq in error state");
478 attr
->max_wr
= srq
->rq
.queue
->buf
->index_mask
;
479 attr
->max_sge
= srq
->rq
.max_sge
;
480 attr
->srq_limit
= srq
->limit
;
484 rxe_err_srq(srq
, "returned err = %d", err
);
488 static int rxe_post_srq_recv(struct ib_srq
*ibsrq
, const struct ib_recv_wr
*wr
,
489 const struct ib_recv_wr
**bad_wr
)
492 struct rxe_srq
*srq
= to_rsrq(ibsrq
);
495 spin_lock_irqsave(&srq
->rq
.producer_lock
, flags
);
498 err
= post_one_recv(&srq
->rq
, wr
);
504 spin_unlock_irqrestore(&srq
->rq
.producer_lock
, flags
);
508 rxe_err_srq(srq
, "returned err = %d", err
);
514 static int rxe_destroy_srq(struct ib_srq
*ibsrq
, struct ib_udata
*udata
)
516 struct rxe_srq
*srq
= to_rsrq(ibsrq
);
519 err
= rxe_cleanup(srq
);
521 rxe_err_srq(srq
, "cleanup failed, err = %d", err
);
527 static int rxe_create_qp(struct ib_qp
*ibqp
, struct ib_qp_init_attr
*init
,
528 struct ib_udata
*udata
)
530 struct rxe_dev
*rxe
= to_rdev(ibqp
->device
);
531 struct rxe_pd
*pd
= to_rpd(ibqp
->pd
);
532 struct rxe_qp
*qp
= to_rqp(ibqp
);
533 struct rxe_create_qp_resp __user
*uresp
= NULL
;
534 int err
, cleanup_err
;
539 rxe_dbg_dev(rxe
, "malformed udata, err = %d", err
);
543 if (udata
->outlen
< sizeof(*uresp
)) {
545 rxe_dbg_dev(rxe
, "malformed udata, err = %d", err
);
550 uresp
= udata
->outbuf
;
555 if (init
->create_flags
) {
557 rxe_dbg_dev(rxe
, "unsupported create_flags, err = %d", err
);
561 err
= rxe_qp_chk_init(rxe
, init
);
563 rxe_dbg_dev(rxe
, "bad init attr, err = %d", err
);
567 err
= rxe_add_to_pool(&rxe
->qp_pool
, qp
);
569 rxe_dbg_dev(rxe
, "unable to create qp, err = %d", err
);
573 err
= rxe_qp_from_init(rxe
, qp
, pd
, init
, uresp
, ibqp
->pd
, udata
);
575 rxe_dbg_qp(qp
, "create qp failed, err = %d", err
);
583 cleanup_err
= rxe_cleanup(qp
);
585 rxe_err_qp(qp
, "cleanup failed, err = %d", cleanup_err
);
587 rxe_err_dev(rxe
, "returned err = %d", err
);
591 static int rxe_modify_qp(struct ib_qp
*ibqp
, struct ib_qp_attr
*attr
,
592 int mask
, struct ib_udata
*udata
)
594 struct rxe_dev
*rxe
= to_rdev(ibqp
->device
);
595 struct rxe_qp
*qp
= to_rqp(ibqp
);
598 if (mask
& ~IB_QP_ATTR_STANDARD_BITS
) {
600 rxe_dbg_qp(qp
, "unsupported mask = 0x%x, err = %d",
605 err
= rxe_qp_chk_attr(rxe
, qp
, attr
, mask
);
607 rxe_dbg_qp(qp
, "bad mask/attr, err = %d", err
);
611 err
= rxe_qp_from_attr(qp
, attr
, mask
, udata
);
613 rxe_dbg_qp(qp
, "modify qp failed, err = %d", err
);
617 if ((mask
& IB_QP_AV
) && (attr
->ah_attr
.ah_flags
& IB_AH_GRH
))
618 qp
->src_port
= rdma_get_udp_sport(attr
->ah_attr
.grh
.flow_label
,
620 qp
->attr
.dest_qp_num
);
625 rxe_err_qp(qp
, "returned err = %d", err
);
629 static int rxe_query_qp(struct ib_qp
*ibqp
, struct ib_qp_attr
*attr
,
630 int mask
, struct ib_qp_init_attr
*init
)
632 struct rxe_qp
*qp
= to_rqp(ibqp
);
634 rxe_qp_to_init(qp
, init
);
635 rxe_qp_to_attr(qp
, attr
, mask
);
640 static int rxe_destroy_qp(struct ib_qp
*ibqp
, struct ib_udata
*udata
)
642 struct rxe_qp
*qp
= to_rqp(ibqp
);
645 err
= rxe_qp_chk_destroy(qp
);
647 rxe_dbg_qp(qp
, "unable to destroy qp, err = %d", err
);
651 err
= rxe_cleanup(qp
);
653 rxe_err_qp(qp
, "cleanup failed, err = %d", err
);
658 rxe_err_qp(qp
, "returned err = %d", err
);
664 /* sanity check incoming send work request */
665 static int validate_send_wr(struct rxe_qp
*qp
, const struct ib_send_wr
*ibwr
,
666 unsigned int *maskp
, unsigned int *lengthp
)
668 int num_sge
= ibwr
->num_sge
;
669 struct rxe_sq
*sq
= &qp
->sq
;
670 unsigned int mask
= 0;
671 unsigned long length
= 0;
676 mask
= wr_opcode_mask(ibwr
->opcode
, qp
);
678 rxe_err_qp(qp
, "bad wr opcode for qp type");
682 if (num_sge
> sq
->max_sge
) {
683 rxe_err_qp(qp
, "num_sge > max_sge");
688 for (i
= 0; i
< ibwr
->num_sge
; i
++)
689 length
+= ibwr
->sg_list
[i
].length
;
691 if (length
> (1UL << 31)) {
692 rxe_err_qp(qp
, "message length too long");
696 if (mask
& WR_ATOMIC_MASK
) {
698 rxe_err_qp(qp
, "atomic length != 8");
701 if (atomic_wr(ibwr
)->remote_addr
& 0x7) {
702 rxe_err_qp(qp
, "misaligned atomic address");
706 if (ibwr
->send_flags
& IB_SEND_INLINE
) {
707 if (!(mask
& WR_INLINE_MASK
)) {
708 rxe_err_qp(qp
, "opcode doesn't support inline data");
711 if (length
> sq
->max_inline
) {
712 rxe_err_qp(qp
, "inline length too big");
721 *lengthp
= (int)length
;
726 static int init_send_wr(struct rxe_qp
*qp
, struct rxe_send_wr
*wr
,
727 const struct ib_send_wr
*ibwr
)
729 wr
->wr_id
= ibwr
->wr_id
;
730 wr
->opcode
= ibwr
->opcode
;
731 wr
->send_flags
= ibwr
->send_flags
;
733 if (qp_type(qp
) == IB_QPT_UD
||
734 qp_type(qp
) == IB_QPT_GSI
) {
735 struct ib_ah
*ibah
= ud_wr(ibwr
)->ah
;
737 wr
->wr
.ud
.remote_qpn
= ud_wr(ibwr
)->remote_qpn
;
738 wr
->wr
.ud
.remote_qkey
= ud_wr(ibwr
)->remote_qkey
;
739 wr
->wr
.ud
.ah_num
= to_rah(ibah
)->ah_num
;
740 if (qp_type(qp
) == IB_QPT_GSI
)
741 wr
->wr
.ud
.pkey_index
= ud_wr(ibwr
)->pkey_index
;
743 switch (wr
->opcode
) {
744 case IB_WR_SEND_WITH_IMM
:
745 wr
->ex
.imm_data
= ibwr
->ex
.imm_data
;
750 rxe_err_qp(qp
, "bad wr opcode %d for UD/GSI QP",
755 switch (wr
->opcode
) {
756 case IB_WR_RDMA_WRITE_WITH_IMM
:
757 wr
->ex
.imm_data
= ibwr
->ex
.imm_data
;
759 case IB_WR_RDMA_READ
:
760 case IB_WR_RDMA_WRITE
:
761 wr
->wr
.rdma
.remote_addr
= rdma_wr(ibwr
)->remote_addr
;
762 wr
->wr
.rdma
.rkey
= rdma_wr(ibwr
)->rkey
;
764 case IB_WR_SEND_WITH_IMM
:
765 wr
->ex
.imm_data
= ibwr
->ex
.imm_data
;
767 case IB_WR_SEND_WITH_INV
:
768 wr
->ex
.invalidate_rkey
= ibwr
->ex
.invalidate_rkey
;
770 case IB_WR_RDMA_READ_WITH_INV
:
771 wr
->ex
.invalidate_rkey
= ibwr
->ex
.invalidate_rkey
;
772 wr
->wr
.rdma
.remote_addr
= rdma_wr(ibwr
)->remote_addr
;
773 wr
->wr
.rdma
.rkey
= rdma_wr(ibwr
)->rkey
;
775 case IB_WR_ATOMIC_CMP_AND_SWP
:
776 case IB_WR_ATOMIC_FETCH_AND_ADD
:
777 wr
->wr
.atomic
.remote_addr
=
778 atomic_wr(ibwr
)->remote_addr
;
779 wr
->wr
.atomic
.compare_add
=
780 atomic_wr(ibwr
)->compare_add
;
781 wr
->wr
.atomic
.swap
= atomic_wr(ibwr
)->swap
;
782 wr
->wr
.atomic
.rkey
= atomic_wr(ibwr
)->rkey
;
784 case IB_WR_LOCAL_INV
:
785 wr
->ex
.invalidate_rkey
= ibwr
->ex
.invalidate_rkey
;
788 wr
->wr
.reg
.mr
= reg_wr(ibwr
)->mr
;
789 wr
->wr
.reg
.key
= reg_wr(ibwr
)->key
;
790 wr
->wr
.reg
.access
= reg_wr(ibwr
)->access
;
795 case IB_WR_ATOMIC_WRITE
:
798 rxe_err_qp(qp
, "unsupported wr opcode %d",
808 static void copy_inline_data_to_wqe(struct rxe_send_wqe
*wqe
,
809 const struct ib_send_wr
*ibwr
)
811 struct ib_sge
*sge
= ibwr
->sg_list
;
812 u8
*p
= wqe
->dma
.inline_data
;
815 for (i
= 0; i
< ibwr
->num_sge
; i
++, sge
++) {
816 memcpy(p
, ib_virt_dma_to_page(sge
->addr
), sge
->length
);
821 static int init_send_wqe(struct rxe_qp
*qp
, const struct ib_send_wr
*ibwr
,
822 unsigned int mask
, unsigned int length
,
823 struct rxe_send_wqe
*wqe
)
825 int num_sge
= ibwr
->num_sge
;
828 err
= init_send_wr(qp
, &wqe
->wr
, ibwr
);
832 /* local operation */
833 if (unlikely(mask
& WR_LOCAL_OP_MASK
)) {
835 wqe
->state
= wqe_state_posted
;
839 if (unlikely(ibwr
->send_flags
& IB_SEND_INLINE
))
840 copy_inline_data_to_wqe(wqe
, ibwr
);
842 memcpy(wqe
->dma
.sge
, ibwr
->sg_list
,
843 num_sge
* sizeof(struct ib_sge
));
845 wqe
->iova
= mask
& WR_ATOMIC_MASK
? atomic_wr(ibwr
)->remote_addr
:
846 mask
& WR_READ_OR_WRITE_MASK
? rdma_wr(ibwr
)->remote_addr
: 0;
848 wqe
->dma
.length
= length
;
849 wqe
->dma
.resid
= length
;
850 wqe
->dma
.num_sge
= num_sge
;
851 wqe
->dma
.cur_sge
= 0;
852 wqe
->dma
.sge_offset
= 0;
853 wqe
->state
= wqe_state_posted
;
854 wqe
->ssn
= atomic_add_return(1, &qp
->ssn
);
859 static int post_one_send(struct rxe_qp
*qp
, const struct ib_send_wr
*ibwr
)
862 struct rxe_sq
*sq
= &qp
->sq
;
863 struct rxe_send_wqe
*send_wqe
;
868 err
= validate_send_wr(qp
, ibwr
, &mask
, &length
);
872 full
= queue_full(sq
->queue
, QUEUE_TYPE_FROM_ULP
);
873 if (unlikely(full
)) {
874 rxe_err_qp(qp
, "send queue full");
878 send_wqe
= queue_producer_addr(sq
->queue
, QUEUE_TYPE_FROM_ULP
);
879 err
= init_send_wqe(qp
, ibwr
, mask
, length
, send_wqe
);
881 queue_advance_producer(sq
->queue
, QUEUE_TYPE_FROM_ULP
);
886 static int rxe_post_send_kernel(struct rxe_qp
*qp
,
887 const struct ib_send_wr
*ibwr
,
888 const struct ib_send_wr
**bad_wr
)
893 spin_lock_irqsave(&qp
->sq
.sq_lock
, flags
);
895 err
= post_one_send(qp
, ibwr
);
902 spin_unlock_irqrestore(&qp
->sq
.sq_lock
, flags
);
905 rxe_sched_task(&qp
->req
.task
);
907 spin_lock_irqsave(&qp
->state_lock
, flags
);
908 if (qp_state(qp
) == IB_QPS_ERR
)
909 rxe_sched_task(&qp
->comp
.task
);
910 spin_unlock_irqrestore(&qp
->state_lock
, flags
);
915 static int rxe_post_send(struct ib_qp
*ibqp
, const struct ib_send_wr
*wr
,
916 const struct ib_send_wr
**bad_wr
)
918 struct rxe_qp
*qp
= to_rqp(ibqp
);
922 spin_lock_irqsave(&qp
->state_lock
, flags
);
923 /* caller has already called destroy_qp */
924 if (WARN_ON_ONCE(!qp
->valid
)) {
925 spin_unlock_irqrestore(&qp
->state_lock
, flags
);
926 rxe_err_qp(qp
, "qp has been destroyed");
930 if (unlikely(qp_state(qp
) < IB_QPS_RTS
)) {
931 spin_unlock_irqrestore(&qp
->state_lock
, flags
);
933 rxe_err_qp(qp
, "qp not ready to send");
936 spin_unlock_irqrestore(&qp
->state_lock
, flags
);
939 /* Utilize process context to do protocol processing */
940 rxe_run_task(&qp
->req
.task
);
942 err
= rxe_post_send_kernel(qp
, wr
, bad_wr
);
951 static int post_one_recv(struct rxe_rq
*rq
, const struct ib_recv_wr
*ibwr
)
954 unsigned long length
;
955 struct rxe_recv_wqe
*recv_wqe
;
956 int num_sge
= ibwr
->num_sge
;
960 full
= queue_full(rq
->queue
, QUEUE_TYPE_FROM_ULP
);
961 if (unlikely(full
)) {
963 rxe_dbg("queue full");
967 if (unlikely(num_sge
> rq
->max_sge
)) {
969 rxe_dbg("bad num_sge > max_sge");
974 for (i
= 0; i
< num_sge
; i
++)
975 length
+= ibwr
->sg_list
[i
].length
;
977 /* IBA max message size is 2^31 */
978 if (length
>= (1UL<<31)) {
980 rxe_dbg("message length too long");
984 recv_wqe
= queue_producer_addr(rq
->queue
, QUEUE_TYPE_FROM_ULP
);
986 recv_wqe
->wr_id
= ibwr
->wr_id
;
987 recv_wqe
->dma
.length
= length
;
988 recv_wqe
->dma
.resid
= length
;
989 recv_wqe
->dma
.num_sge
= num_sge
;
990 recv_wqe
->dma
.cur_sge
= 0;
991 recv_wqe
->dma
.sge_offset
= 0;
992 memcpy(recv_wqe
->dma
.sge
, ibwr
->sg_list
,
993 num_sge
* sizeof(struct ib_sge
));
995 queue_advance_producer(rq
->queue
, QUEUE_TYPE_FROM_ULP
);
1000 rxe_dbg("returned err = %d", err
);
1004 static int rxe_post_recv(struct ib_qp
*ibqp
, const struct ib_recv_wr
*wr
,
1005 const struct ib_recv_wr
**bad_wr
)
1008 struct rxe_qp
*qp
= to_rqp(ibqp
);
1009 struct rxe_rq
*rq
= &qp
->rq
;
1010 unsigned long flags
;
1012 spin_lock_irqsave(&qp
->state_lock
, flags
);
1013 /* caller has already called destroy_qp */
1014 if (WARN_ON_ONCE(!qp
->valid
)) {
1015 spin_unlock_irqrestore(&qp
->state_lock
, flags
);
1016 rxe_err_qp(qp
, "qp has been destroyed");
1020 /* see C10-97.2.1 */
1021 if (unlikely((qp_state(qp
) < IB_QPS_INIT
))) {
1022 spin_unlock_irqrestore(&qp
->state_lock
, flags
);
1024 rxe_dbg_qp(qp
, "qp not ready to post recv");
1027 spin_unlock_irqrestore(&qp
->state_lock
, flags
);
1029 if (unlikely(qp
->srq
)) {
1031 rxe_dbg_qp(qp
, "qp has srq, use post_srq_recv instead");
1035 spin_lock_irqsave(&rq
->producer_lock
, flags
);
1038 err
= post_one_recv(rq
, wr
);
1039 if (unlikely(err
)) {
1046 spin_unlock_irqrestore(&rq
->producer_lock
, flags
);
1048 spin_lock_irqsave(&qp
->state_lock
, flags
);
1049 if (qp_state(qp
) == IB_QPS_ERR
)
1050 rxe_sched_task(&qp
->resp
.task
);
1051 spin_unlock_irqrestore(&qp
->state_lock
, flags
);
1057 static int rxe_create_cq(struct ib_cq
*ibcq
, const struct ib_cq_init_attr
*attr
,
1058 struct ib_udata
*udata
)
1060 struct ib_device
*dev
= ibcq
->device
;
1061 struct rxe_dev
*rxe
= to_rdev(dev
);
1062 struct rxe_cq
*cq
= to_rcq(ibcq
);
1063 struct rxe_create_cq_resp __user
*uresp
= NULL
;
1064 int err
, cleanup_err
;
1067 if (udata
->outlen
< sizeof(*uresp
)) {
1069 rxe_dbg_dev(rxe
, "malformed udata, err = %d", err
);
1072 uresp
= udata
->outbuf
;
1077 rxe_dbg_dev(rxe
, "bad attr->flags, err = %d", err
);
1081 err
= rxe_cq_chk_attr(rxe
, NULL
, attr
->cqe
, attr
->comp_vector
);
1083 rxe_dbg_dev(rxe
, "bad init attributes, err = %d", err
);
1087 err
= rxe_add_to_pool(&rxe
->cq_pool
, cq
);
1089 rxe_dbg_dev(rxe
, "unable to create cq, err = %d", err
);
1093 err
= rxe_cq_from_init(rxe
, cq
, attr
->cqe
, attr
->comp_vector
, udata
,
1096 rxe_dbg_cq(cq
, "create cq failed, err = %d", err
);
1103 cleanup_err
= rxe_cleanup(cq
);
1105 rxe_err_cq(cq
, "cleanup failed, err = %d", cleanup_err
);
1107 rxe_err_dev(rxe
, "returned err = %d", err
);
1111 static int rxe_resize_cq(struct ib_cq
*ibcq
, int cqe
, struct ib_udata
*udata
)
1113 struct rxe_cq
*cq
= to_rcq(ibcq
);
1114 struct rxe_dev
*rxe
= to_rdev(ibcq
->device
);
1115 struct rxe_resize_cq_resp __user
*uresp
= NULL
;
1119 if (udata
->outlen
< sizeof(*uresp
)) {
1121 rxe_dbg_cq(cq
, "malformed udata");
1124 uresp
= udata
->outbuf
;
1127 err
= rxe_cq_chk_attr(rxe
, cq
, cqe
, 0);
1129 rxe_dbg_cq(cq
, "bad attr, err = %d", err
);
1133 err
= rxe_cq_resize_queue(cq
, cqe
, uresp
, udata
);
1135 rxe_dbg_cq(cq
, "resize cq failed, err = %d", err
);
1142 rxe_err_cq(cq
, "returned err = %d", err
);
1146 static int rxe_poll_cq(struct ib_cq
*ibcq
, int num_entries
, struct ib_wc
*wc
)
1149 struct rxe_cq
*cq
= to_rcq(ibcq
);
1150 struct rxe_cqe
*cqe
;
1151 unsigned long flags
;
1153 spin_lock_irqsave(&cq
->cq_lock
, flags
);
1154 for (i
= 0; i
< num_entries
; i
++) {
1155 cqe
= queue_head(cq
->queue
, QUEUE_TYPE_TO_ULP
);
1157 break; /* queue empty */
1159 memcpy(wc
++, &cqe
->ibwc
, sizeof(*wc
));
1160 queue_advance_consumer(cq
->queue
, QUEUE_TYPE_TO_ULP
);
1162 spin_unlock_irqrestore(&cq
->cq_lock
, flags
);
1167 static int rxe_peek_cq(struct ib_cq
*ibcq
, int wc_cnt
)
1169 struct rxe_cq
*cq
= to_rcq(ibcq
);
1172 count
= queue_count(cq
->queue
, QUEUE_TYPE_TO_ULP
);
1174 return (count
> wc_cnt
) ? wc_cnt
: count
;
1177 static int rxe_req_notify_cq(struct ib_cq
*ibcq
, enum ib_cq_notify_flags flags
)
1179 struct rxe_cq
*cq
= to_rcq(ibcq
);
1182 unsigned long irq_flags
;
1184 spin_lock_irqsave(&cq
->cq_lock
, irq_flags
);
1185 cq
->notify
|= flags
& IB_CQ_SOLICITED_MASK
;
1186 empty
= queue_empty(cq
->queue
, QUEUE_TYPE_TO_ULP
);
1188 if ((flags
& IB_CQ_REPORT_MISSED_EVENTS
) && !empty
)
1191 spin_unlock_irqrestore(&cq
->cq_lock
, irq_flags
);
1196 static int rxe_destroy_cq(struct ib_cq
*ibcq
, struct ib_udata
*udata
)
1198 struct rxe_cq
*cq
= to_rcq(ibcq
);
1201 /* See IBA C11-17: The CI shall return an error if this Verb is
1202 * invoked while a Work Queue is still associated with the CQ.
1204 if (atomic_read(&cq
->num_wq
)) {
1206 rxe_dbg_cq(cq
, "still in use");
1210 err
= rxe_cleanup(cq
);
1212 rxe_err_cq(cq
, "cleanup failed, err = %d", err
);
1217 rxe_err_cq(cq
, "returned err = %d", err
);
1222 static struct ib_mr
*rxe_get_dma_mr(struct ib_pd
*ibpd
, int access
)
1224 struct rxe_dev
*rxe
= to_rdev(ibpd
->device
);
1225 struct rxe_pd
*pd
= to_rpd(ibpd
);
1229 mr
= kzalloc(sizeof(*mr
), GFP_KERNEL
);
1231 return ERR_PTR(-ENOMEM
);
1233 err
= rxe_add_to_pool(&rxe
->mr_pool
, mr
);
1235 rxe_dbg_dev(rxe
, "unable to create mr");
1241 mr
->ibmr
.device
= ibpd
->device
;
1243 rxe_mr_init_dma(access
, mr
);
1249 rxe_err_pd(pd
, "returned err = %d", err
);
1250 return ERR_PTR(err
);
1253 static struct ib_mr
*rxe_reg_user_mr(struct ib_pd
*ibpd
, u64 start
,
1254 u64 length
, u64 iova
, int access
,
1255 struct ib_udata
*udata
)
1257 struct rxe_dev
*rxe
= to_rdev(ibpd
->device
);
1258 struct rxe_pd
*pd
= to_rpd(ibpd
);
1260 int err
, cleanup_err
;
1262 if (access
& ~RXE_ACCESS_SUPPORTED_MR
) {
1263 rxe_err_pd(pd
, "access = %#x not supported (%#x)", access
,
1264 RXE_ACCESS_SUPPORTED_MR
);
1265 return ERR_PTR(-EOPNOTSUPP
);
1268 mr
= kzalloc(sizeof(*mr
), GFP_KERNEL
);
1270 return ERR_PTR(-ENOMEM
);
1272 err
= rxe_add_to_pool(&rxe
->mr_pool
, mr
);
1274 rxe_dbg_pd(pd
, "unable to create mr");
1280 mr
->ibmr
.device
= ibpd
->device
;
1282 err
= rxe_mr_init_user(rxe
, start
, length
, iova
, access
, mr
);
1284 rxe_dbg_mr(mr
, "reg_user_mr failed, err = %d", err
);
1292 cleanup_err
= rxe_cleanup(mr
);
1294 rxe_err_mr(mr
, "cleanup failed, err = %d", cleanup_err
);
1297 rxe_err_pd(pd
, "returned err = %d", err
);
1298 return ERR_PTR(err
);
1301 static struct ib_mr
*rxe_rereg_user_mr(struct ib_mr
*ibmr
, int flags
,
1302 u64 start
, u64 length
, u64 iova
,
1303 int access
, struct ib_pd
*ibpd
,
1304 struct ib_udata
*udata
)
1306 struct rxe_mr
*mr
= to_rmr(ibmr
);
1307 struct rxe_pd
*old_pd
= to_rpd(ibmr
->pd
);
1308 struct rxe_pd
*pd
= to_rpd(ibpd
);
1310 /* for now only support the two easy cases:
1311 * rereg_pd and rereg_access
1313 if (flags
& ~RXE_MR_REREG_SUPPORTED
) {
1314 rxe_err_mr(mr
, "flags = %#x not supported", flags
);
1315 return ERR_PTR(-EOPNOTSUPP
);
1318 if (flags
& IB_MR_REREG_PD
) {
1324 if (flags
& IB_MR_REREG_ACCESS
) {
1325 if (access
& ~RXE_ACCESS_SUPPORTED_MR
) {
1326 rxe_err_mr(mr
, "access = %#x not supported", access
);
1327 return ERR_PTR(-EOPNOTSUPP
);
1329 mr
->access
= access
;
1335 static struct ib_mr
*rxe_alloc_mr(struct ib_pd
*ibpd
, enum ib_mr_type mr_type
,
1338 struct rxe_dev
*rxe
= to_rdev(ibpd
->device
);
1339 struct rxe_pd
*pd
= to_rpd(ibpd
);
1341 int err
, cleanup_err
;
1343 if (mr_type
!= IB_MR_TYPE_MEM_REG
) {
1345 rxe_dbg_pd(pd
, "mr type %d not supported, err = %d",
1350 mr
= kzalloc(sizeof(*mr
), GFP_KERNEL
);
1352 return ERR_PTR(-ENOMEM
);
1354 err
= rxe_add_to_pool(&rxe
->mr_pool
, mr
);
1360 mr
->ibmr
.device
= ibpd
->device
;
1362 err
= rxe_mr_init_fast(max_num_sg
, mr
);
1364 rxe_dbg_mr(mr
, "alloc_mr failed, err = %d", err
);
1372 cleanup_err
= rxe_cleanup(mr
);
1374 rxe_err_mr(mr
, "cleanup failed, err = %d", err
);
1378 rxe_err_pd(pd
, "returned err = %d", err
);
1379 return ERR_PTR(err
);
1382 static int rxe_dereg_mr(struct ib_mr
*ibmr
, struct ib_udata
*udata
)
1384 struct rxe_mr
*mr
= to_rmr(ibmr
);
1385 int err
, cleanup_err
;
1387 /* See IBA 10.6.7.2.6 */
1388 if (atomic_read(&mr
->num_mw
) > 0) {
1390 rxe_dbg_mr(mr
, "mr has mw's bound");
1394 cleanup_err
= rxe_cleanup(mr
);
1396 rxe_err_mr(mr
, "cleanup failed, err = %d", cleanup_err
);
1398 kfree_rcu_mightsleep(mr
);
1402 rxe_err_mr(mr
, "returned err = %d", err
);
1406 static ssize_t
parent_show(struct device
*device
,
1407 struct device_attribute
*attr
, char *buf
)
1409 struct rxe_dev
*rxe
=
1410 rdma_device_to_drv_device(device
, struct rxe_dev
, ib_dev
);
1412 return sysfs_emit(buf
, "%s\n", rxe_parent_name(rxe
, 1));
1415 static DEVICE_ATTR_RO(parent
);
1417 static struct attribute
*rxe_dev_attributes
[] = {
1418 &dev_attr_parent
.attr
,
1422 static const struct attribute_group rxe_attr_group
= {
1423 .attrs
= rxe_dev_attributes
,
1426 static int rxe_enable_driver(struct ib_device
*ib_dev
)
1428 struct rxe_dev
*rxe
= container_of(ib_dev
, struct rxe_dev
, ib_dev
);
1430 rxe_set_port_state(rxe
);
1431 dev_info(&rxe
->ib_dev
.dev
, "added %s\n", netdev_name(rxe
->ndev
));
1435 static const struct ib_device_ops rxe_dev_ops
= {
1436 .owner
= THIS_MODULE
,
1437 .driver_id
= RDMA_DRIVER_RXE
,
1438 .uverbs_abi_ver
= RXE_UVERBS_ABI_VERSION
,
1440 .alloc_hw_port_stats
= rxe_ib_alloc_hw_port_stats
,
1441 .alloc_mr
= rxe_alloc_mr
,
1442 .alloc_mw
= rxe_alloc_mw
,
1443 .alloc_pd
= rxe_alloc_pd
,
1444 .alloc_ucontext
= rxe_alloc_ucontext
,
1445 .attach_mcast
= rxe_attach_mcast
,
1446 .create_ah
= rxe_create_ah
,
1447 .create_cq
= rxe_create_cq
,
1448 .create_qp
= rxe_create_qp
,
1449 .create_srq
= rxe_create_srq
,
1450 .create_user_ah
= rxe_create_ah
,
1451 .dealloc_driver
= rxe_dealloc
,
1452 .dealloc_mw
= rxe_dealloc_mw
,
1453 .dealloc_pd
= rxe_dealloc_pd
,
1454 .dealloc_ucontext
= rxe_dealloc_ucontext
,
1455 .dereg_mr
= rxe_dereg_mr
,
1456 .destroy_ah
= rxe_destroy_ah
,
1457 .destroy_cq
= rxe_destroy_cq
,
1458 .destroy_qp
= rxe_destroy_qp
,
1459 .destroy_srq
= rxe_destroy_srq
,
1460 .detach_mcast
= rxe_detach_mcast
,
1461 .device_group
= &rxe_attr_group
,
1462 .enable_driver
= rxe_enable_driver
,
1463 .get_dma_mr
= rxe_get_dma_mr
,
1464 .get_hw_stats
= rxe_ib_get_hw_stats
,
1465 .get_link_layer
= rxe_get_link_layer
,
1466 .get_port_immutable
= rxe_port_immutable
,
1467 .map_mr_sg
= rxe_map_mr_sg
,
1469 .modify_ah
= rxe_modify_ah
,
1470 .modify_device
= rxe_modify_device
,
1471 .modify_port
= rxe_modify_port
,
1472 .modify_qp
= rxe_modify_qp
,
1473 .modify_srq
= rxe_modify_srq
,
1474 .peek_cq
= rxe_peek_cq
,
1475 .poll_cq
= rxe_poll_cq
,
1476 .post_recv
= rxe_post_recv
,
1477 .post_send
= rxe_post_send
,
1478 .post_srq_recv
= rxe_post_srq_recv
,
1479 .query_ah
= rxe_query_ah
,
1480 .query_device
= rxe_query_device
,
1481 .query_pkey
= rxe_query_pkey
,
1482 .query_port
= rxe_query_port
,
1483 .query_qp
= rxe_query_qp
,
1484 .query_srq
= rxe_query_srq
,
1485 .reg_user_mr
= rxe_reg_user_mr
,
1486 .req_notify_cq
= rxe_req_notify_cq
,
1487 .rereg_user_mr
= rxe_rereg_user_mr
,
1488 .resize_cq
= rxe_resize_cq
,
1490 INIT_RDMA_OBJ_SIZE(ib_ah
, rxe_ah
, ibah
),
1491 INIT_RDMA_OBJ_SIZE(ib_cq
, rxe_cq
, ibcq
),
1492 INIT_RDMA_OBJ_SIZE(ib_pd
, rxe_pd
, ibpd
),
1493 INIT_RDMA_OBJ_SIZE(ib_qp
, rxe_qp
, ibqp
),
1494 INIT_RDMA_OBJ_SIZE(ib_srq
, rxe_srq
, ibsrq
),
1495 INIT_RDMA_OBJ_SIZE(ib_ucontext
, rxe_ucontext
, ibuc
),
1496 INIT_RDMA_OBJ_SIZE(ib_mw
, rxe_mw
, ibmw
),
1499 int rxe_register_device(struct rxe_dev
*rxe
, const char *ibdev_name
)
1502 struct ib_device
*dev
= &rxe
->ib_dev
;
1504 strscpy(dev
->node_desc
, "rxe", sizeof(dev
->node_desc
));
1506 dev
->node_type
= RDMA_NODE_IB_CA
;
1507 dev
->phys_port_cnt
= 1;
1508 dev
->num_comp_vectors
= num_possible_cpus();
1509 dev
->local_dma_lkey
= 0;
1510 addrconf_addr_eui48((unsigned char *)&dev
->node_guid
,
1511 rxe
->ndev
->dev_addr
);
1513 dev
->uverbs_cmd_mask
|= BIT_ULL(IB_USER_VERBS_CMD_POST_SEND
) |
1514 BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ
);
1516 ib_set_device_ops(dev
, &rxe_dev_ops
);
1517 err
= ib_device_set_netdev(&rxe
->ib_dev
, rxe
->ndev
, 1);
1521 err
= rxe_icrc_init(rxe
);
1525 err
= ib_register_device(dev
, ibdev_name
, NULL
);
1527 rxe_dbg_dev(rxe
, "failed with error %d\n", err
);
1530 * Note that rxe may be invalid at this point if another thread