1 // SPDX-License-Identifier: GPL-2.0
3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
5 * CLC (connection layer control) handshake over initial TCP socket to
6 * prepare for RDMA traffic
8 * Copyright IBM Corp. 2016
10 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
14 #include <linux/if_ether.h>
15 #include <linux/sched/signal.h>
25 /* Wait for data on the tcp-socket, analyze received data
27 * 0 if success and it was not a decline that we received.
28 * SMC_CLC_DECL_REPLY if decline received for fallback w/o another decl send.
29 * clcsock error, -EINTR, -ECONNRESET, -EPROTO otherwise.
31 int smc_clc_wait_msg(struct smc_sock
*smc
, void *buf
, int buflen
,
34 struct sock
*clc_sk
= smc
->clcsock
->sk
;
35 struct smc_clc_msg_hdr
*clcm
= buf
;
36 struct msghdr msg
= {NULL
, 0};
42 /* peek the first few bytes to determine length of data to receive
43 * so we don't consume any subsequent CLC message or payload data
44 * in the TCP byte stream
48 krflags
= MSG_PEEK
| MSG_WAITALL
;
49 smc
->clcsock
->sk
->sk_rcvtimeo
= CLC_WAIT_TIME
;
50 len
= kernel_recvmsg(smc
->clcsock
, &msg
, &vec
, 1,
51 sizeof(struct smc_clc_msg_hdr
), krflags
);
52 if (signal_pending(current
)) {
54 clc_sk
->sk_err
= EINTR
;
55 smc
->sk
.sk_err
= EINTR
;
59 reason_code
= -clc_sk
->sk_err
;
60 smc
->sk
.sk_err
= clc_sk
->sk_err
;
63 if (!len
) { /* peer has performed orderly shutdown */
64 smc
->sk
.sk_err
= ECONNRESET
;
65 reason_code
= -ECONNRESET
;
69 smc
->sk
.sk_err
= -len
;
73 datlen
= ntohs(clcm
->length
);
74 if ((len
< sizeof(struct smc_clc_msg_hdr
)) ||
75 (datlen
< sizeof(struct smc_clc_msg_decline
)) ||
76 (datlen
> sizeof(struct smc_clc_msg_accept_confirm
)) ||
77 memcmp(clcm
->eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
)) ||
78 ((clcm
->type
!= SMC_CLC_DECLINE
) &&
79 (clcm
->type
!= expected_type
))) {
80 smc
->sk
.sk_err
= EPROTO
;
81 reason_code
= -EPROTO
;
85 /* receive the complete CLC message */
88 memset(&msg
, 0, sizeof(struct msghdr
));
89 krflags
= MSG_WAITALL
;
90 smc
->clcsock
->sk
->sk_rcvtimeo
= CLC_WAIT_TIME
;
91 len
= kernel_recvmsg(smc
->clcsock
, &msg
, &vec
, 1, datlen
, krflags
);
93 smc
->sk
.sk_err
= EPROTO
;
94 reason_code
= -EPROTO
;
97 if (clcm
->type
== SMC_CLC_DECLINE
) {
98 reason_code
= SMC_CLC_DECL_REPLY
;
99 if (((struct smc_clc_msg_decline
*)buf
)->hdr
.flag
) {
100 smc
->conn
.lgr
->sync_err
= true;
101 smc_lgr_terminate(smc
->conn
.lgr
);
109 /* send CLC DECLINE message across internal TCP socket */
110 int smc_clc_send_decline(struct smc_sock
*smc
, u32 peer_diag_info
)
112 struct smc_clc_msg_decline dclc
;
117 memset(&dclc
, 0, sizeof(dclc
));
118 memcpy(dclc
.hdr
.eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
));
119 dclc
.hdr
.type
= SMC_CLC_DECLINE
;
120 dclc
.hdr
.length
= htons(sizeof(struct smc_clc_msg_decline
));
121 dclc
.hdr
.version
= SMC_CLC_V1
;
122 dclc
.hdr
.flag
= (peer_diag_info
== SMC_CLC_DECL_SYNCERR
) ? 1 : 0;
123 memcpy(dclc
.id_for_peer
, local_systemid
, sizeof(local_systemid
));
124 dclc
.peer_diagnosis
= htonl(peer_diag_info
);
125 memcpy(dclc
.trl
.eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
));
127 memset(&msg
, 0, sizeof(msg
));
128 vec
.iov_base
= &dclc
;
129 vec
.iov_len
= sizeof(struct smc_clc_msg_decline
);
130 len
= kernel_sendmsg(smc
->clcsock
, &msg
, &vec
, 1,
131 sizeof(struct smc_clc_msg_decline
));
132 if (len
< sizeof(struct smc_clc_msg_decline
))
133 smc
->sk
.sk_err
= EPROTO
;
135 smc
->sk
.sk_err
= -len
;
139 /* send CLC PROPOSAL message across internal TCP socket */
140 int smc_clc_send_proposal(struct smc_sock
*smc
,
141 struct smc_ib_device
*smcibdev
,
144 struct smc_clc_msg_proposal pclc
;
150 /* send SMC Proposal CLC message */
151 memset(&pclc
, 0, sizeof(pclc
));
152 memcpy(pclc
.hdr
.eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
));
153 pclc
.hdr
.type
= SMC_CLC_PROPOSAL
;
154 pclc
.hdr
.length
= htons(sizeof(pclc
));
155 pclc
.hdr
.version
= SMC_CLC_V1
; /* SMC version */
156 memcpy(pclc
.lcl
.id_for_peer
, local_systemid
, sizeof(local_systemid
));
157 memcpy(&pclc
.lcl
.gid
, &smcibdev
->gid
[ibport
- 1], SMC_GID_SIZE
);
158 memcpy(&pclc
.lcl
.mac
, &smcibdev
->mac
[ibport
- 1], ETH_ALEN
);
160 /* determine subnet and mask from internal TCP socket */
161 rc
= smc_netinfo_by_tcpsk(smc
->clcsock
, &pclc
.outgoing_subnet
,
164 return SMC_CLC_DECL_CNFERR
; /* configuration error */
165 memcpy(pclc
.trl
.eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
));
166 memset(&msg
, 0, sizeof(msg
));
167 vec
.iov_base
= &pclc
;
168 vec
.iov_len
= sizeof(pclc
);
169 /* due to the few bytes needed for clc-handshake this cannot block */
170 len
= kernel_sendmsg(smc
->clcsock
, &msg
, &vec
, 1, sizeof(pclc
));
171 if (len
< sizeof(pclc
)) {
173 reason_code
= -ENETUNREACH
;
174 smc
->sk
.sk_err
= -reason_code
;
176 smc
->sk
.sk_err
= smc
->clcsock
->sk
->sk_err
;
177 reason_code
= -smc
->sk
.sk_err
;
184 /* send CLC CONFIRM message across internal TCP socket */
185 int smc_clc_send_confirm(struct smc_sock
*smc
)
187 struct smc_connection
*conn
= &smc
->conn
;
188 struct smc_clc_msg_accept_confirm cclc
;
189 struct smc_link
*link
;
195 link
= &conn
->lgr
->lnk
[SMC_SINGLE_LINK
];
196 /* send SMC Confirm CLC msg */
197 memset(&cclc
, 0, sizeof(cclc
));
198 memcpy(cclc
.hdr
.eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
));
199 cclc
.hdr
.type
= SMC_CLC_CONFIRM
;
200 cclc
.hdr
.length
= htons(sizeof(cclc
));
201 cclc
.hdr
.version
= SMC_CLC_V1
; /* SMC version */
202 memcpy(cclc
.lcl
.id_for_peer
, local_systemid
, sizeof(local_systemid
));
203 memcpy(&cclc
.lcl
.gid
, &link
->smcibdev
->gid
[link
->ibport
- 1],
205 memcpy(&cclc
.lcl
.mac
, &link
->smcibdev
->mac
[link
->ibport
- 1], ETH_ALEN
);
206 hton24(cclc
.qpn
, link
->roce_qp
->qp_num
);
208 htonl(conn
->rmb_desc
->mr_rx
[SMC_SINGLE_LINK
]->rkey
);
209 cclc
.conn_idx
= 1; /* for now: 1 RMB = 1 RMBE */
210 cclc
.rmbe_alert_token
= htonl(conn
->alert_token_local
);
211 cclc
.qp_mtu
= min(link
->path_mtu
, link
->peer_mtu
);
212 cclc
.rmbe_size
= conn
->rmbe_size_short
;
213 cclc
.rmb_dma_addr
= cpu_to_be64(
214 (u64
)sg_dma_address(conn
->rmb_desc
->sgt
[SMC_SINGLE_LINK
].sgl
));
215 hton24(cclc
.psn
, link
->psn_initial
);
217 memcpy(cclc
.trl
.eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
));
219 memset(&msg
, 0, sizeof(msg
));
220 vec
.iov_base
= &cclc
;
221 vec
.iov_len
= sizeof(cclc
);
222 len
= kernel_sendmsg(smc
->clcsock
, &msg
, &vec
, 1, sizeof(cclc
));
223 if (len
< sizeof(cclc
)) {
225 reason_code
= -ENETUNREACH
;
226 smc
->sk
.sk_err
= -reason_code
;
228 smc
->sk
.sk_err
= smc
->clcsock
->sk
->sk_err
;
229 reason_code
= -smc
->sk
.sk_err
;
235 /* send CLC ACCEPT message across internal TCP socket */
236 int smc_clc_send_accept(struct smc_sock
*new_smc
, int srv_first_contact
)
238 struct smc_connection
*conn
= &new_smc
->conn
;
239 struct smc_clc_msg_accept_confirm aclc
;
240 struct smc_link
*link
;
246 link
= &conn
->lgr
->lnk
[SMC_SINGLE_LINK
];
247 memset(&aclc
, 0, sizeof(aclc
));
248 memcpy(aclc
.hdr
.eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
));
249 aclc
.hdr
.type
= SMC_CLC_ACCEPT
;
250 aclc
.hdr
.length
= htons(sizeof(aclc
));
251 aclc
.hdr
.version
= SMC_CLC_V1
; /* SMC version */
252 if (srv_first_contact
)
254 memcpy(aclc
.lcl
.id_for_peer
, local_systemid
, sizeof(local_systemid
));
255 memcpy(&aclc
.lcl
.gid
, &link
->smcibdev
->gid
[link
->ibport
- 1],
257 memcpy(&aclc
.lcl
.mac
, link
->smcibdev
->mac
[link
->ibport
- 1], ETH_ALEN
);
258 hton24(aclc
.qpn
, link
->roce_qp
->qp_num
);
260 htonl(conn
->rmb_desc
->mr_rx
[SMC_SINGLE_LINK
]->rkey
);
261 aclc
.conn_idx
= 1; /* as long as 1 RMB = 1 RMBE */
262 aclc
.rmbe_alert_token
= htonl(conn
->alert_token_local
);
263 aclc
.qp_mtu
= link
->path_mtu
;
264 aclc
.rmbe_size
= conn
->rmbe_size_short
,
265 aclc
.rmb_dma_addr
= cpu_to_be64(
266 (u64
)sg_dma_address(conn
->rmb_desc
->sgt
[SMC_SINGLE_LINK
].sgl
));
267 hton24(aclc
.psn
, link
->psn_initial
);
268 memcpy(aclc
.trl
.eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
));
270 memset(&msg
, 0, sizeof(msg
));
271 vec
.iov_base
= &aclc
;
272 vec
.iov_len
= sizeof(aclc
);
273 len
= kernel_sendmsg(new_smc
->clcsock
, &msg
, &vec
, 1, sizeof(aclc
));
274 if (len
< sizeof(aclc
)) {
276 new_smc
->sk
.sk_err
= EPROTO
;
278 new_smc
->sk
.sk_err
= new_smc
->clcsock
->sk
->sk_err
;
279 rc
= sock_error(&new_smc
->sk
);