1 // SPDX-License-Identifier: GPL-2.0
3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
5 * Basic Transport Functions exploiting Infiniband API
7 * Copyright IBM Corp. 2016
9 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
12 #include <linux/socket.h>
13 #include <linux/if_vlan.h>
14 #include <linux/random.h>
15 #include <linux/workqueue.h>
16 #include <linux/wait.h>
17 #include <linux/reboot.h>
18 #include <linux/mutex.h>
19 #include <linux/list.h>
20 #include <linux/smc.h>
23 #include <rdma/ib_verbs.h>
24 #include <rdma/ib_cache.h>
33 #include "smc_close.h"
35 #include "smc_netlink.h"
36 #include "smc_stats.h"
37 #include "smc_tracepoint.h"
39 #define SMC_LGR_NUM_INCR 256
40 #define SMC_LGR_FREE_DELAY_SERV (600 * HZ)
41 #define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10 * HZ)
43 struct smc_lgr_list smc_lgr_list
= { /* established link groups */
44 .lock
= __SPIN_LOCK_UNLOCKED(smc_lgr_list
.lock
),
45 .list
= LIST_HEAD_INIT(smc_lgr_list
.list
),
49 static atomic_t lgr_cnt
= ATOMIC_INIT(0); /* number of existing link groups */
50 static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted
);
52 static void smc_buf_free(struct smc_link_group
*lgr
, bool is_rmb
,
53 struct smc_buf_desc
*buf_desc
);
54 static void __smc_lgr_terminate(struct smc_link_group
*lgr
, bool soft
);
56 static void smc_link_down_work(struct work_struct
*work
);
58 /* return head of link group list and its lock for a given link group */
59 static inline struct list_head
*smc_lgr_list_head(struct smc_link_group
*lgr
,
60 spinlock_t
**lgr_lock
)
63 *lgr_lock
= &lgr
->smcd
->lgr_lock
;
64 return &lgr
->smcd
->lgr_list
;
67 *lgr_lock
= &smc_lgr_list
.lock
;
68 return &smc_lgr_list
.list
;
71 static void smc_ibdev_cnt_inc(struct smc_link
*lnk
)
73 atomic_inc(&lnk
->smcibdev
->lnk_cnt_by_port
[lnk
->ibport
- 1]);
76 static void smc_ibdev_cnt_dec(struct smc_link
*lnk
)
78 atomic_dec(&lnk
->smcibdev
->lnk_cnt_by_port
[lnk
->ibport
- 1]);
81 static void smc_lgr_schedule_free_work(struct smc_link_group
*lgr
)
83 /* client link group creation always follows the server link group
84 * creation. For client use a somewhat higher removal delay time,
85 * otherwise there is a risk of out-of-sync link groups.
88 mod_delayed_work(system_wq
, &lgr
->free_work
,
89 (!lgr
->is_smcd
&& lgr
->role
== SMC_CLNT
) ?
90 SMC_LGR_FREE_DELAY_CLNT
:
91 SMC_LGR_FREE_DELAY_SERV
);
95 /* Register connection's alert token in our lookup structure.
96 * To use rbtrees we have to implement our own insert core.
97 * Requires @conns_lock
98 * @smc connection to register
99 * Returns 0 on success, != otherwise.
101 static void smc_lgr_add_alert_token(struct smc_connection
*conn
)
103 struct rb_node
**link
, *parent
= NULL
;
104 u32 token
= conn
->alert_token_local
;
106 link
= &conn
->lgr
->conns_all
.rb_node
;
108 struct smc_connection
*cur
= rb_entry(*link
,
109 struct smc_connection
, alert_node
);
112 if (cur
->alert_token_local
> token
)
113 link
= &parent
->rb_left
;
115 link
= &parent
->rb_right
;
117 /* Put the new node there */
118 rb_link_node(&conn
->alert_node
, parent
, link
);
119 rb_insert_color(&conn
->alert_node
, &conn
->lgr
->conns_all
);
122 /* assign an SMC-R link to the connection */
123 static int smcr_lgr_conn_assign_link(struct smc_connection
*conn
, bool first
)
125 enum smc_link_state expected
= first
? SMC_LNK_ACTIVATING
:
129 /* do link balancing */
130 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
131 struct smc_link
*lnk
= &conn
->lgr
->lnk
[i
];
133 if (lnk
->state
!= expected
|| lnk
->link_is_asym
)
135 if (conn
->lgr
->role
== SMC_CLNT
) {
136 conn
->lnk
= lnk
; /* temporary, SMC server assigns link*/
139 if (conn
->lgr
->conns_num
% 2) {
140 for (j
= i
+ 1; j
< SMC_LINKS_PER_LGR_MAX
; j
++) {
141 struct smc_link
*lnk2
;
143 lnk2
= &conn
->lgr
->lnk
[j
];
144 if (lnk2
->state
== expected
&&
145 !lnk2
->link_is_asym
) {
156 return SMC_CLC_DECL_NOACTLINK
;
157 atomic_inc(&conn
->lnk
->conn_cnt
);
161 /* Register connection in link group by assigning an alert token
162 * registered in a search tree.
163 * Requires @conns_lock
164 * Note that '0' is a reserved value and not assigned.
166 static int smc_lgr_register_conn(struct smc_connection
*conn
, bool first
)
168 struct smc_sock
*smc
= container_of(conn
, struct smc_sock
, conn
);
169 static atomic_t nexttoken
= ATOMIC_INIT(0);
172 if (!conn
->lgr
->is_smcd
) {
173 rc
= smcr_lgr_conn_assign_link(conn
, first
);
179 /* find a new alert_token_local value not yet used by some connection
182 sock_hold(&smc
->sk
); /* sock_put in smc_lgr_unregister_conn() */
183 while (!conn
->alert_token_local
) {
184 conn
->alert_token_local
= atomic_inc_return(&nexttoken
);
185 if (smc_lgr_find_conn(conn
->alert_token_local
, conn
->lgr
))
186 conn
->alert_token_local
= 0;
188 smc_lgr_add_alert_token(conn
);
189 conn
->lgr
->conns_num
++;
193 /* Unregister connection and reset the alert token of the given connection<
195 static void __smc_lgr_unregister_conn(struct smc_connection
*conn
)
197 struct smc_sock
*smc
= container_of(conn
, struct smc_sock
, conn
);
198 struct smc_link_group
*lgr
= conn
->lgr
;
200 rb_erase(&conn
->alert_node
, &lgr
->conns_all
);
202 atomic_dec(&conn
->lnk
->conn_cnt
);
204 conn
->alert_token_local
= 0;
205 sock_put(&smc
->sk
); /* sock_hold in smc_lgr_register_conn() */
208 /* Unregister connection from lgr
210 static void smc_lgr_unregister_conn(struct smc_connection
*conn
)
212 struct smc_link_group
*lgr
= conn
->lgr
;
214 if (!smc_conn_lgr_valid(conn
))
216 write_lock_bh(&lgr
->conns_lock
);
217 if (conn
->alert_token_local
) {
218 __smc_lgr_unregister_conn(conn
);
220 write_unlock_bh(&lgr
->conns_lock
);
223 int smc_nl_get_sys_info(struct sk_buff
*skb
, struct netlink_callback
*cb
)
225 struct smc_nl_dmp_ctx
*cb_ctx
= smc_nl_dmp_ctx(cb
);
226 char hostname
[SMC_MAX_HOSTNAME_LEN
+ 1];
227 char smc_seid
[SMC_MAX_EID_LEN
+ 1];
228 struct nlattr
*attrs
;
233 nlh
= genlmsg_put(skb
, NETLINK_CB(cb
->skb
).portid
, cb
->nlh
->nlmsg_seq
,
234 &smc_gen_nl_family
, NLM_F_MULTI
,
235 SMC_NETLINK_GET_SYS_INFO
);
240 attrs
= nla_nest_start(skb
, SMC_GEN_SYS_INFO
);
243 if (nla_put_u8(skb
, SMC_NLA_SYS_VER
, SMC_V2
))
245 if (nla_put_u8(skb
, SMC_NLA_SYS_REL
, SMC_RELEASE
))
247 if (nla_put_u8(skb
, SMC_NLA_SYS_IS_ISM_V2
, smc_ism_is_v2_capable()))
249 if (nla_put_u8(skb
, SMC_NLA_SYS_IS_SMCR_V2
, true))
251 smc_clc_get_hostname(&host
);
253 memcpy(hostname
, host
, SMC_MAX_HOSTNAME_LEN
);
254 hostname
[SMC_MAX_HOSTNAME_LEN
] = 0;
255 if (nla_put_string(skb
, SMC_NLA_SYS_LOCAL_HOST
, hostname
))
258 if (smc_ism_is_v2_capable()) {
259 smc_ism_get_system_eid(&seid
);
260 memcpy(smc_seid
, seid
, SMC_MAX_EID_LEN
);
261 smc_seid
[SMC_MAX_EID_LEN
] = 0;
262 if (nla_put_string(skb
, SMC_NLA_SYS_SEID
, smc_seid
))
265 nla_nest_end(skb
, attrs
);
266 genlmsg_end(skb
, nlh
);
271 nla_nest_cancel(skb
, attrs
);
273 genlmsg_cancel(skb
, nlh
);
278 /* Fill SMC_NLA_LGR_D_V2_COMMON/SMC_NLA_LGR_R_V2_COMMON nested attributes */
279 static int smc_nl_fill_lgr_v2_common(struct smc_link_group
*lgr
,
281 struct netlink_callback
*cb
,
282 struct nlattr
*v2_attrs
)
284 char smc_host
[SMC_MAX_HOSTNAME_LEN
+ 1];
285 char smc_eid
[SMC_MAX_EID_LEN
+ 1];
287 if (nla_put_u8(skb
, SMC_NLA_LGR_V2_VER
, lgr
->smc_version
))
289 if (nla_put_u8(skb
, SMC_NLA_LGR_V2_REL
, lgr
->peer_smc_release
))
291 if (nla_put_u8(skb
, SMC_NLA_LGR_V2_OS
, lgr
->peer_os
))
293 memcpy(smc_host
, lgr
->peer_hostname
, SMC_MAX_HOSTNAME_LEN
);
294 smc_host
[SMC_MAX_HOSTNAME_LEN
] = 0;
295 if (nla_put_string(skb
, SMC_NLA_LGR_V2_PEER_HOST
, smc_host
))
297 memcpy(smc_eid
, lgr
->negotiated_eid
, SMC_MAX_EID_LEN
);
298 smc_eid
[SMC_MAX_EID_LEN
] = 0;
299 if (nla_put_string(skb
, SMC_NLA_LGR_V2_NEG_EID
, smc_eid
))
302 nla_nest_end(skb
, v2_attrs
);
306 nla_nest_cancel(skb
, v2_attrs
);
310 static int smc_nl_fill_smcr_lgr_v2(struct smc_link_group
*lgr
,
312 struct netlink_callback
*cb
)
314 struct nlattr
*v2_attrs
;
316 v2_attrs
= nla_nest_start(skb
, SMC_NLA_LGR_R_V2
);
319 if (nla_put_u8(skb
, SMC_NLA_LGR_R_V2_DIRECT
, !lgr
->uses_gateway
))
322 nla_nest_end(skb
, v2_attrs
);
326 nla_nest_cancel(skb
, v2_attrs
);
331 static int smc_nl_fill_lgr(struct smc_link_group
*lgr
,
333 struct netlink_callback
*cb
)
335 char smc_target
[SMC_MAX_PNETID_LEN
+ 1];
336 struct nlattr
*attrs
, *v2_attrs
;
338 attrs
= nla_nest_start(skb
, SMC_GEN_LGR_SMCR
);
342 if (nla_put_u32(skb
, SMC_NLA_LGR_R_ID
, *((u32
*)&lgr
->id
)))
344 if (nla_put_u32(skb
, SMC_NLA_LGR_R_CONNS_NUM
, lgr
->conns_num
))
346 if (nla_put_u8(skb
, SMC_NLA_LGR_R_ROLE
, lgr
->role
))
348 if (nla_put_u8(skb
, SMC_NLA_LGR_R_TYPE
, lgr
->type
))
350 if (nla_put_u8(skb
, SMC_NLA_LGR_R_VLAN_ID
, lgr
->vlan_id
))
352 if (nla_put_u64_64bit(skb
, SMC_NLA_LGR_R_NET_COOKIE
,
353 lgr
->net
->net_cookie
, SMC_NLA_LGR_R_PAD
))
355 memcpy(smc_target
, lgr
->pnet_id
, SMC_MAX_PNETID_LEN
);
356 smc_target
[SMC_MAX_PNETID_LEN
] = 0;
357 if (nla_put_string(skb
, SMC_NLA_LGR_R_PNETID
, smc_target
))
359 if (lgr
->smc_version
> SMC_V1
) {
360 v2_attrs
= nla_nest_start(skb
, SMC_NLA_LGR_R_V2_COMMON
);
363 if (smc_nl_fill_lgr_v2_common(lgr
, skb
, cb
, v2_attrs
))
365 if (smc_nl_fill_smcr_lgr_v2(lgr
, skb
, cb
))
369 nla_nest_end(skb
, attrs
);
372 nla_nest_cancel(skb
, attrs
);
377 static int smc_nl_fill_lgr_link(struct smc_link_group
*lgr
,
378 struct smc_link
*link
,
380 struct netlink_callback
*cb
)
382 char smc_ibname
[IB_DEVICE_NAME_MAX
];
383 u8 smc_gid_target
[41];
384 struct nlattr
*attrs
;
388 nlh
= genlmsg_put(skb
, NETLINK_CB(cb
->skb
).portid
, cb
->nlh
->nlmsg_seq
,
389 &smc_gen_nl_family
, NLM_F_MULTI
,
390 SMC_NETLINK_GET_LINK_SMCR
);
394 attrs
= nla_nest_start(skb
, SMC_GEN_LINK_SMCR
);
398 if (nla_put_u8(skb
, SMC_NLA_LINK_ID
, link
->link_id
))
400 if (nla_put_u32(skb
, SMC_NLA_LINK_STATE
, link
->state
))
402 if (nla_put_u32(skb
, SMC_NLA_LINK_CONN_CNT
,
403 atomic_read(&link
->conn_cnt
)))
405 if (nla_put_u8(skb
, SMC_NLA_LINK_IB_PORT
, link
->ibport
))
407 if (nla_put_u32(skb
, SMC_NLA_LINK_NET_DEV
, link
->ndev_ifidx
))
409 snprintf(smc_ibname
, sizeof(smc_ibname
), "%s", link
->ibname
);
410 if (nla_put_string(skb
, SMC_NLA_LINK_IB_DEV
, smc_ibname
))
412 memcpy(&link_uid
, link
->link_uid
, sizeof(link_uid
));
413 if (nla_put_u32(skb
, SMC_NLA_LINK_UID
, link_uid
))
415 memcpy(&link_uid
, link
->peer_link_uid
, sizeof(link_uid
));
416 if (nla_put_u32(skb
, SMC_NLA_LINK_PEER_UID
, link_uid
))
418 memset(smc_gid_target
, 0, sizeof(smc_gid_target
));
419 smc_gid_be16_convert(smc_gid_target
, link
->gid
);
420 if (nla_put_string(skb
, SMC_NLA_LINK_GID
, smc_gid_target
))
422 memset(smc_gid_target
, 0, sizeof(smc_gid_target
));
423 smc_gid_be16_convert(smc_gid_target
, link
->peer_gid
);
424 if (nla_put_string(skb
, SMC_NLA_LINK_PEER_GID
, smc_gid_target
))
427 nla_nest_end(skb
, attrs
);
428 genlmsg_end(skb
, nlh
);
431 nla_nest_cancel(skb
, attrs
);
433 genlmsg_cancel(skb
, nlh
);
438 static int smc_nl_handle_lgr(struct smc_link_group
*lgr
,
440 struct netlink_callback
*cb
,
446 nlh
= genlmsg_put(skb
, NETLINK_CB(cb
->skb
).portid
, cb
->nlh
->nlmsg_seq
,
447 &smc_gen_nl_family
, NLM_F_MULTI
,
448 SMC_NETLINK_GET_LGR_SMCR
);
451 if (smc_nl_fill_lgr(lgr
, skb
, cb
))
454 genlmsg_end(skb
, nlh
);
457 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
458 if (!smc_link_usable(&lgr
->lnk
[i
]))
460 if (smc_nl_fill_lgr_link(lgr
, &lgr
->lnk
[i
], skb
, cb
))
467 genlmsg_cancel(skb
, nlh
);
472 static void smc_nl_fill_lgr_list(struct smc_lgr_list
*smc_lgr
,
474 struct netlink_callback
*cb
,
477 struct smc_nl_dmp_ctx
*cb_ctx
= smc_nl_dmp_ctx(cb
);
478 struct smc_link_group
*lgr
;
479 int snum
= cb_ctx
->pos
[0];
482 spin_lock_bh(&smc_lgr
->lock
);
483 list_for_each_entry(lgr
, &smc_lgr
->list
, list
) {
486 if (smc_nl_handle_lgr(lgr
, skb
, cb
, list_links
))
492 spin_unlock_bh(&smc_lgr
->lock
);
493 cb_ctx
->pos
[0] = num
;
496 static int smc_nl_fill_smcd_lgr(struct smc_link_group
*lgr
,
498 struct netlink_callback
*cb
)
500 char smc_pnet
[SMC_MAX_PNETID_LEN
+ 1];
501 struct nlattr
*attrs
;
504 nlh
= genlmsg_put(skb
, NETLINK_CB(cb
->skb
).portid
, cb
->nlh
->nlmsg_seq
,
505 &smc_gen_nl_family
, NLM_F_MULTI
,
506 SMC_NETLINK_GET_LGR_SMCD
);
510 attrs
= nla_nest_start(skb
, SMC_GEN_LGR_SMCD
);
514 if (nla_put_u32(skb
, SMC_NLA_LGR_D_ID
, *((u32
*)&lgr
->id
)))
516 if (nla_put_u64_64bit(skb
, SMC_NLA_LGR_D_GID
, lgr
->smcd
->local_gid
,
519 if (nla_put_u64_64bit(skb
, SMC_NLA_LGR_D_PEER_GID
, lgr
->peer_gid
,
522 if (nla_put_u8(skb
, SMC_NLA_LGR_D_VLAN_ID
, lgr
->vlan_id
))
524 if (nla_put_u32(skb
, SMC_NLA_LGR_D_CONNS_NUM
, lgr
->conns_num
))
526 if (nla_put_u32(skb
, SMC_NLA_LGR_D_CHID
, smc_ism_get_chid(lgr
->smcd
)))
528 memcpy(smc_pnet
, lgr
->smcd
->pnetid
, SMC_MAX_PNETID_LEN
);
529 smc_pnet
[SMC_MAX_PNETID_LEN
] = 0;
530 if (nla_put_string(skb
, SMC_NLA_LGR_D_PNETID
, smc_pnet
))
532 if (lgr
->smc_version
> SMC_V1
) {
533 struct nlattr
*v2_attrs
;
535 v2_attrs
= nla_nest_start(skb
, SMC_NLA_LGR_D_V2_COMMON
);
538 if (smc_nl_fill_lgr_v2_common(lgr
, skb
, cb
, v2_attrs
))
541 nla_nest_end(skb
, attrs
);
542 genlmsg_end(skb
, nlh
);
546 nla_nest_cancel(skb
, attrs
);
548 genlmsg_cancel(skb
, nlh
);
553 static int smc_nl_handle_smcd_lgr(struct smcd_dev
*dev
,
555 struct netlink_callback
*cb
)
557 struct smc_nl_dmp_ctx
*cb_ctx
= smc_nl_dmp_ctx(cb
);
558 struct smc_link_group
*lgr
;
559 int snum
= cb_ctx
->pos
[1];
562 spin_lock_bh(&dev
->lgr_lock
);
563 list_for_each_entry(lgr
, &dev
->lgr_list
, list
) {
568 rc
= smc_nl_fill_smcd_lgr(lgr
, skb
, cb
);
575 spin_unlock_bh(&dev
->lgr_lock
);
576 cb_ctx
->pos
[1] = num
;
580 static int smc_nl_fill_smcd_dev(struct smcd_dev_list
*dev_list
,
582 struct netlink_callback
*cb
)
584 struct smc_nl_dmp_ctx
*cb_ctx
= smc_nl_dmp_ctx(cb
);
585 struct smcd_dev
*smcd_dev
;
586 int snum
= cb_ctx
->pos
[0];
589 mutex_lock(&dev_list
->mutex
);
590 list_for_each_entry(smcd_dev
, &dev_list
->list
, list
) {
591 if (list_empty(&smcd_dev
->lgr_list
))
595 rc
= smc_nl_handle_smcd_lgr(smcd_dev
, skb
, cb
);
602 mutex_unlock(&dev_list
->mutex
);
603 cb_ctx
->pos
[0] = num
;
607 int smcr_nl_get_lgr(struct sk_buff
*skb
, struct netlink_callback
*cb
)
609 bool list_links
= false;
611 smc_nl_fill_lgr_list(&smc_lgr_list
, skb
, cb
, list_links
);
615 int smcr_nl_get_link(struct sk_buff
*skb
, struct netlink_callback
*cb
)
617 bool list_links
= true;
619 smc_nl_fill_lgr_list(&smc_lgr_list
, skb
, cb
, list_links
);
623 int smcd_nl_get_lgr(struct sk_buff
*skb
, struct netlink_callback
*cb
)
625 smc_nl_fill_smcd_dev(&smcd_dev_list
, skb
, cb
);
629 void smc_lgr_cleanup_early(struct smc_link_group
*lgr
)
631 spinlock_t
*lgr_lock
;
636 smc_lgr_list_head(lgr
, &lgr_lock
);
637 spin_lock_bh(lgr_lock
);
638 /* do not use this link group for new connections */
639 if (!list_empty(&lgr
->list
))
640 list_del_init(&lgr
->list
);
641 spin_unlock_bh(lgr_lock
);
642 __smc_lgr_terminate(lgr
, true);
645 static void smcr_lgr_link_deactivate_all(struct smc_link_group
*lgr
)
649 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
650 struct smc_link
*lnk
= &lgr
->lnk
[i
];
652 if (smc_link_sendable(lnk
))
653 lnk
->state
= SMC_LNK_INACTIVE
;
655 wake_up_all(&lgr
->llc_msg_waiter
);
656 wake_up_all(&lgr
->llc_flow_waiter
);
659 static void smc_lgr_free(struct smc_link_group
*lgr
);
661 static void smc_lgr_free_work(struct work_struct
*work
)
663 struct smc_link_group
*lgr
= container_of(to_delayed_work(work
),
664 struct smc_link_group
,
666 spinlock_t
*lgr_lock
;
669 smc_lgr_list_head(lgr
, &lgr_lock
);
670 spin_lock_bh(lgr_lock
);
672 spin_unlock_bh(lgr_lock
);
675 read_lock_bh(&lgr
->conns_lock
);
676 conns
= RB_EMPTY_ROOT(&lgr
->conns_all
);
677 read_unlock_bh(&lgr
->conns_lock
);
678 if (!conns
) { /* number of lgr connections is no longer zero */
679 spin_unlock_bh(lgr_lock
);
682 list_del_init(&lgr
->list
); /* remove from smc_lgr_list */
683 lgr
->freeing
= 1; /* this instance does the freeing, no new schedule */
684 spin_unlock_bh(lgr_lock
);
685 cancel_delayed_work(&lgr
->free_work
);
687 if (!lgr
->is_smcd
&& !lgr
->terminating
)
688 smc_llc_send_link_delete_all(lgr
, true,
689 SMC_LLC_DEL_PROG_INIT_TERM
);
690 if (lgr
->is_smcd
&& !lgr
->terminating
)
691 smc_ism_signal_shutdown(lgr
);
693 smcr_lgr_link_deactivate_all(lgr
);
697 static void smc_lgr_terminate_work(struct work_struct
*work
)
699 struct smc_link_group
*lgr
= container_of(work
, struct smc_link_group
,
702 __smc_lgr_terminate(lgr
, true);
705 /* return next unique link id for the lgr */
706 static u8
smcr_next_link_id(struct smc_link_group
*lgr
)
713 link_id
= ++lgr
->next_link_id
;
714 if (!link_id
) /* skip zero as link_id */
715 link_id
= ++lgr
->next_link_id
;
716 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
717 if (smc_link_usable(&lgr
->lnk
[i
]) &&
718 lgr
->lnk
[i
].link_id
== link_id
)
726 static void smcr_copy_dev_info_to_link(struct smc_link
*link
)
728 struct smc_ib_device
*smcibdev
= link
->smcibdev
;
730 snprintf(link
->ibname
, sizeof(link
->ibname
), "%s",
731 smcibdev
->ibdev
->name
);
732 link
->ndev_ifidx
= smcibdev
->ndev_ifidx
[link
->ibport
- 1];
735 int smcr_link_init(struct smc_link_group
*lgr
, struct smc_link
*lnk
,
736 u8 link_idx
, struct smc_init_info
*ini
)
738 struct smc_ib_device
*smcibdev
;
742 if (lgr
->smc_version
== SMC_V2
) {
743 lnk
->smcibdev
= ini
->smcrv2
.ib_dev_v2
;
744 lnk
->ibport
= ini
->smcrv2
.ib_port_v2
;
746 lnk
->smcibdev
= ini
->ib_dev
;
747 lnk
->ibport
= ini
->ib_port
;
749 get_device(&lnk
->smcibdev
->ibdev
->dev
);
750 atomic_inc(&lnk
->smcibdev
->lnk_cnt
);
751 refcount_set(&lnk
->refcnt
, 1); /* link refcnt is set to 1 */
753 lnk
->path_mtu
= lnk
->smcibdev
->pattr
[lnk
->ibport
- 1].active_mtu
;
754 lnk
->link_id
= smcr_next_link_id(lgr
);
756 smc_lgr_hold(lgr
); /* lgr_put in smcr_link_clear() */
757 lnk
->link_idx
= link_idx
;
758 smc_ibdev_cnt_inc(lnk
);
759 smcr_copy_dev_info_to_link(lnk
);
760 atomic_set(&lnk
->conn_cnt
, 0);
761 smc_llc_link_set_uid(lnk
);
762 INIT_WORK(&lnk
->link_down_wrk
, smc_link_down_work
);
763 if (!lnk
->smcibdev
->initialized
) {
764 rc
= (int)smc_ib_setup_per_ibdev(lnk
->smcibdev
);
768 get_random_bytes(rndvec
, sizeof(rndvec
));
769 lnk
->psn_initial
= rndvec
[0] + (rndvec
[1] << 8) +
771 rc
= smc_ib_determine_gid(lnk
->smcibdev
, lnk
->ibport
,
772 ini
->vlan_id
, lnk
->gid
, &lnk
->sgid_index
,
773 lgr
->smc_version
== SMC_V2
?
774 &ini
->smcrv2
: NULL
);
777 rc
= smc_llc_link_init(lnk
);
780 rc
= smc_wr_alloc_link_mem(lnk
);
783 rc
= smc_ib_create_protection_domain(lnk
);
786 rc
= smc_ib_create_queue_pair(lnk
);
789 rc
= smc_wr_create_link(lnk
);
792 lnk
->state
= SMC_LNK_ACTIVATING
;
796 smc_ib_destroy_queue_pair(lnk
);
798 smc_ib_dealloc_protection_domain(lnk
);
800 smc_wr_free_link_mem(lnk
);
802 smc_llc_link_clear(lnk
, false);
804 smc_ibdev_cnt_dec(lnk
);
805 put_device(&lnk
->smcibdev
->ibdev
->dev
);
806 smcibdev
= lnk
->smcibdev
;
807 memset(lnk
, 0, sizeof(struct smc_link
));
808 lnk
->state
= SMC_LNK_UNUSED
;
809 if (!atomic_dec_return(&smcibdev
->lnk_cnt
))
810 wake_up(&smcibdev
->lnks_deleted
);
811 smc_lgr_put(lgr
); /* lgr_hold above */
815 /* create a new SMC link group */
816 static int smc_lgr_create(struct smc_sock
*smc
, struct smc_init_info
*ini
)
818 struct smc_link_group
*lgr
;
819 struct list_head
*lgr_list
;
820 struct smc_link
*lnk
;
821 spinlock_t
*lgr_lock
;
826 if (ini
->is_smcd
&& ini
->vlan_id
) {
827 if (smc_ism_get_vlan(ini
->ism_dev
[ini
->ism_selected
],
829 rc
= SMC_CLC_DECL_ISMVLANERR
;
834 lgr
= kzalloc(sizeof(*lgr
), GFP_KERNEL
);
836 rc
= SMC_CLC_DECL_MEM
;
839 lgr
->tx_wq
= alloc_workqueue("smc_tx_wq-%*phN", 0, 0,
840 SMC_LGR_ID_SIZE
, &lgr
->id
);
845 lgr
->is_smcd
= ini
->is_smcd
;
847 lgr
->terminating
= 0;
849 lgr
->vlan_id
= ini
->vlan_id
;
850 refcount_set(&lgr
->refcnt
, 1); /* set lgr refcnt to 1 */
851 mutex_init(&lgr
->sndbufs_lock
);
852 mutex_init(&lgr
->rmbs_lock
);
853 rwlock_init(&lgr
->conns_lock
);
854 for (i
= 0; i
< SMC_RMBE_SIZES
; i
++) {
855 INIT_LIST_HEAD(&lgr
->sndbufs
[i
]);
856 INIT_LIST_HEAD(&lgr
->rmbs
[i
]);
858 lgr
->next_link_id
= 0;
859 smc_lgr_list
.num
+= SMC_LGR_NUM_INCR
;
860 memcpy(&lgr
->id
, (u8
*)&smc_lgr_list
.num
, SMC_LGR_ID_SIZE
);
861 INIT_DELAYED_WORK(&lgr
->free_work
, smc_lgr_free_work
);
862 INIT_WORK(&lgr
->terminate_work
, smc_lgr_terminate_work
);
863 lgr
->conns_all
= RB_ROOT
;
865 /* SMC-D specific settings */
866 get_device(&ini
->ism_dev
[ini
->ism_selected
]->dev
);
867 lgr
->peer_gid
= ini
->ism_peer_gid
[ini
->ism_selected
];
868 lgr
->smcd
= ini
->ism_dev
[ini
->ism_selected
];
869 lgr_list
= &ini
->ism_dev
[ini
->ism_selected
]->lgr_list
;
870 lgr_lock
= &lgr
->smcd
->lgr_lock
;
871 lgr
->smc_version
= ini
->smcd_version
;
872 lgr
->peer_shutdown
= 0;
873 atomic_inc(&ini
->ism_dev
[ini
->ism_selected
]->lgr_cnt
);
875 /* SMC-R specific settings */
876 struct smc_ib_device
*ibdev
;
879 lgr
->role
= smc
->listen_smc
? SMC_SERV
: SMC_CLNT
;
880 lgr
->smc_version
= ini
->smcr_version
;
881 memcpy(lgr
->peer_systemid
, ini
->peer_systemid
,
883 if (lgr
->smc_version
== SMC_V2
) {
884 ibdev
= ini
->smcrv2
.ib_dev_v2
;
885 ibport
= ini
->smcrv2
.ib_port_v2
;
886 lgr
->saddr
= ini
->smcrv2
.saddr
;
887 lgr
->uses_gateway
= ini
->smcrv2
.uses_gateway
;
888 memcpy(lgr
->nexthop_mac
, ini
->smcrv2
.nexthop_mac
,
892 ibport
= ini
->ib_port
;
894 memcpy(lgr
->pnet_id
, ibdev
->pnetid
[ibport
- 1],
896 if (smc_wr_alloc_lgr_mem(lgr
))
898 smc_llc_lgr_init(lgr
, smc
);
900 link_idx
= SMC_SINGLE_LINK
;
901 lnk
= &lgr
->lnk
[link_idx
];
902 rc
= smcr_link_init(lgr
, lnk
, link_idx
, ini
);
904 smc_wr_free_lgr_mem(lgr
);
907 lgr
->net
= smc_ib_net(lnk
->smcibdev
);
908 lgr_list
= &smc_lgr_list
.list
;
909 lgr_lock
= &smc_lgr_list
.lock
;
910 atomic_inc(&lgr_cnt
);
913 spin_lock_bh(lgr_lock
);
914 list_add_tail(&lgr
->list
, lgr_list
);
915 spin_unlock_bh(lgr_lock
);
919 destroy_workqueue(lgr
->tx_wq
);
923 if (ini
->is_smcd
&& ini
->vlan_id
)
924 smc_ism_put_vlan(ini
->ism_dev
[ini
->ism_selected
], ini
->vlan_id
);
928 rc
= SMC_CLC_DECL_MEM
;
930 rc
= SMC_CLC_DECL_INTERR
;
935 static int smc_write_space(struct smc_connection
*conn
)
937 int buffer_len
= conn
->peer_rmbe_size
;
938 union smc_host_cursor prod
;
939 union smc_host_cursor cons
;
942 smc_curs_copy(&prod
, &conn
->local_tx_ctrl
.prod
, conn
);
943 smc_curs_copy(&cons
, &conn
->local_rx_ctrl
.cons
, conn
);
944 /* determine rx_buf space */
945 space
= buffer_len
- smc_curs_diff(buffer_len
, &cons
, &prod
);
949 static int smc_switch_cursor(struct smc_sock
*smc
, struct smc_cdc_tx_pend
*pend
,
950 struct smc_wr_buf
*wr_buf
)
952 struct smc_connection
*conn
= &smc
->conn
;
953 union smc_host_cursor cons
, fin
;
957 smc_curs_copy(&conn
->tx_curs_sent
, &conn
->tx_curs_fin
, conn
);
958 smc_curs_copy(&fin
, &conn
->local_tx_ctrl_fin
, conn
);
959 /* set prod cursor to old state, enforce tx_rdma_writes() */
960 smc_curs_copy(&conn
->local_tx_ctrl
.prod
, &fin
, conn
);
961 smc_curs_copy(&cons
, &conn
->local_rx_ctrl
.cons
, conn
);
963 if (smc_curs_comp(conn
->peer_rmbe_size
, &cons
, &fin
) < 0) {
964 /* cons cursor advanced more than fin, and prod was set
965 * fin above, so now prod is smaller than cons. Fix that.
967 diff
= smc_curs_diff(conn
->peer_rmbe_size
, &fin
, &cons
);
968 smc_curs_add(conn
->sndbuf_desc
->len
,
969 &conn
->tx_curs_sent
, diff
);
970 smc_curs_add(conn
->sndbuf_desc
->len
,
971 &conn
->tx_curs_fin
, diff
);
973 smp_mb__before_atomic();
974 atomic_add(diff
, &conn
->sndbuf_space
);
975 smp_mb__after_atomic();
977 smc_curs_add(conn
->peer_rmbe_size
,
978 &conn
->local_tx_ctrl
.prod
, diff
);
979 smc_curs_add(conn
->peer_rmbe_size
,
980 &conn
->local_tx_ctrl_fin
, diff
);
982 /* recalculate, value is used by tx_rdma_writes() */
983 atomic_set(&smc
->conn
.peer_rmbe_space
, smc_write_space(conn
));
985 if (smc
->sk
.sk_state
!= SMC_INIT
&&
986 smc
->sk
.sk_state
!= SMC_CLOSED
) {
987 rc
= smcr_cdc_msg_send_validation(conn
, pend
, wr_buf
);
989 queue_delayed_work(conn
->lgr
->tx_wq
, &conn
->tx_work
, 0);
990 smc
->sk
.sk_data_ready(&smc
->sk
);
993 smc_wr_tx_put_slot(conn
->lnk
,
994 (struct smc_wr_tx_pend_priv
*)pend
);
999 void smc_switch_link_and_count(struct smc_connection
*conn
,
1000 struct smc_link
*to_lnk
)
1002 atomic_dec(&conn
->lnk
->conn_cnt
);
1003 /* link_hold in smc_conn_create() */
1004 smcr_link_put(conn
->lnk
);
1006 atomic_inc(&conn
->lnk
->conn_cnt
);
1007 /* link_put in smc_conn_free() */
1008 smcr_link_hold(conn
->lnk
);
1011 struct smc_link
*smc_switch_conns(struct smc_link_group
*lgr
,
1012 struct smc_link
*from_lnk
, bool is_dev_err
)
1014 struct smc_link
*to_lnk
= NULL
;
1015 struct smc_cdc_tx_pend
*pend
;
1016 struct smc_connection
*conn
;
1017 struct smc_wr_buf
*wr_buf
;
1018 struct smc_sock
*smc
;
1019 struct rb_node
*node
;
1022 /* link is inactive, wake up tx waiters */
1023 smc_wr_wakeup_tx_wait(from_lnk
);
1025 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
1026 if (!smc_link_active(&lgr
->lnk
[i
]) || i
== from_lnk
->link_idx
)
1028 if (is_dev_err
&& from_lnk
->smcibdev
== lgr
->lnk
[i
].smcibdev
&&
1029 from_lnk
->ibport
== lgr
->lnk
[i
].ibport
) {
1032 to_lnk
= &lgr
->lnk
[i
];
1035 if (!to_lnk
|| !smc_wr_tx_link_hold(to_lnk
)) {
1036 smc_lgr_terminate_sched(lgr
);
1040 read_lock_bh(&lgr
->conns_lock
);
1041 for (node
= rb_first(&lgr
->conns_all
); node
; node
= rb_next(node
)) {
1042 conn
= rb_entry(node
, struct smc_connection
, alert_node
);
1043 if (conn
->lnk
!= from_lnk
)
1045 smc
= container_of(conn
, struct smc_sock
, conn
);
1046 /* conn->lnk not yet set in SMC_INIT state */
1047 if (smc
->sk
.sk_state
== SMC_INIT
)
1049 if (smc
->sk
.sk_state
== SMC_CLOSED
||
1050 smc
->sk
.sk_state
== SMC_PEERCLOSEWAIT1
||
1051 smc
->sk
.sk_state
== SMC_PEERCLOSEWAIT2
||
1052 smc
->sk
.sk_state
== SMC_APPFINCLOSEWAIT
||
1053 smc
->sk
.sk_state
== SMC_APPCLOSEWAIT1
||
1054 smc
->sk
.sk_state
== SMC_APPCLOSEWAIT2
||
1055 smc
->sk
.sk_state
== SMC_PEERFINCLOSEWAIT
||
1056 smc
->sk
.sk_state
== SMC_PEERABORTWAIT
||
1057 smc
->sk
.sk_state
== SMC_PROCESSABORT
) {
1058 spin_lock_bh(&conn
->send_lock
);
1059 smc_switch_link_and_count(conn
, to_lnk
);
1060 spin_unlock_bh(&conn
->send_lock
);
1063 sock_hold(&smc
->sk
);
1064 read_unlock_bh(&lgr
->conns_lock
);
1065 /* pre-fetch buffer outside of send_lock, might sleep */
1066 rc
= smc_cdc_get_free_slot(conn
, to_lnk
, &wr_buf
, NULL
, &pend
);
1069 /* avoid race with smcr_tx_sndbuf_nonempty() */
1070 spin_lock_bh(&conn
->send_lock
);
1071 smc_switch_link_and_count(conn
, to_lnk
);
1072 rc
= smc_switch_cursor(smc
, pend
, wr_buf
);
1073 spin_unlock_bh(&conn
->send_lock
);
1079 read_unlock_bh(&lgr
->conns_lock
);
1080 smc_wr_tx_link_put(to_lnk
);
1084 smcr_link_down_cond_sched(to_lnk
);
1085 smc_wr_tx_link_put(to_lnk
);
1089 static void smcr_buf_unuse(struct smc_buf_desc
*rmb_desc
,
1090 struct smc_link_group
*lgr
)
1094 if (rmb_desc
->is_conf_rkey
&& !list_empty(&lgr
->list
)) {
1095 /* unregister rmb with peer */
1096 rc
= smc_llc_flow_initiate(lgr
, SMC_LLC_FLOW_RKEY
);
1098 /* protect against smc_llc_cli_rkey_exchange() */
1099 mutex_lock(&lgr
->llc_conf_mutex
);
1100 smc_llc_do_delete_rkey(lgr
, rmb_desc
);
1101 rmb_desc
->is_conf_rkey
= false;
1102 mutex_unlock(&lgr
->llc_conf_mutex
);
1103 smc_llc_flow_stop(lgr
, &lgr
->llc_flow_lcl
);
1107 if (rmb_desc
->is_reg_err
) {
1108 /* buf registration failed, reuse not possible */
1109 mutex_lock(&lgr
->rmbs_lock
);
1110 list_del(&rmb_desc
->list
);
1111 mutex_unlock(&lgr
->rmbs_lock
);
1113 smc_buf_free(lgr
, true, rmb_desc
);
1116 memset(rmb_desc
->cpu_addr
, 0, rmb_desc
->len
);
1120 static void smc_buf_unuse(struct smc_connection
*conn
,
1121 struct smc_link_group
*lgr
)
1123 if (conn
->sndbuf_desc
) {
1124 conn
->sndbuf_desc
->used
= 0;
1125 memset(conn
->sndbuf_desc
->cpu_addr
, 0, conn
->sndbuf_desc
->len
);
1127 if (conn
->rmb_desc
&& lgr
->is_smcd
) {
1128 conn
->rmb_desc
->used
= 0;
1129 memset(conn
->rmb_desc
->cpu_addr
, 0, conn
->rmb_desc
->len
+
1130 sizeof(struct smcd_cdc_msg
));
1131 } else if (conn
->rmb_desc
) {
1132 smcr_buf_unuse(conn
->rmb_desc
, lgr
);
1136 /* remove a finished connection from its link group */
1137 void smc_conn_free(struct smc_connection
*conn
)
1139 struct smc_link_group
*lgr
= conn
->lgr
;
1141 if (!lgr
|| conn
->freed
)
1142 /* Connection has never been registered in a
1143 * link group, or has already been freed.
1148 if (!smc_conn_lgr_valid(conn
))
1149 /* Connection has already unregistered from
1155 if (!list_empty(&lgr
->list
))
1156 smc_ism_unset_conn(conn
);
1157 tasklet_kill(&conn
->rx_tsklet
);
1159 smc_cdc_wait_pend_tx_wr(conn
);
1160 if (current_work() != &conn
->abort_work
)
1161 cancel_work_sync(&conn
->abort_work
);
1163 if (!list_empty(&lgr
->list
)) {
1164 smc_buf_unuse(conn
, lgr
); /* allow buffer reuse */
1165 smc_lgr_unregister_conn(conn
);
1168 if (!lgr
->conns_num
)
1169 smc_lgr_schedule_free_work(lgr
);
1172 smcr_link_put(conn
->lnk
); /* link_hold in smc_conn_create() */
1173 smc_lgr_put(lgr
); /* lgr_hold in smc_conn_create() */
1176 /* unregister a link from a buf_desc */
1177 static void smcr_buf_unmap_link(struct smc_buf_desc
*buf_desc
, bool is_rmb
,
1178 struct smc_link
*lnk
)
1181 buf_desc
->is_reg_mr
[lnk
->link_idx
] = false;
1182 if (!buf_desc
->is_map_ib
[lnk
->link_idx
])
1185 if (buf_desc
->mr_rx
[lnk
->link_idx
]) {
1186 smc_ib_put_memory_region(
1187 buf_desc
->mr_rx
[lnk
->link_idx
]);
1188 buf_desc
->mr_rx
[lnk
->link_idx
] = NULL
;
1190 smc_ib_buf_unmap_sg(lnk
, buf_desc
, DMA_FROM_DEVICE
);
1192 smc_ib_buf_unmap_sg(lnk
, buf_desc
, DMA_TO_DEVICE
);
1194 sg_free_table(&buf_desc
->sgt
[lnk
->link_idx
]);
1195 buf_desc
->is_map_ib
[lnk
->link_idx
] = false;
1198 /* unmap all buffers of lgr for a deleted link */
1199 static void smcr_buf_unmap_lgr(struct smc_link
*lnk
)
1201 struct smc_link_group
*lgr
= lnk
->lgr
;
1202 struct smc_buf_desc
*buf_desc
, *bf
;
1205 for (i
= 0; i
< SMC_RMBE_SIZES
; i
++) {
1206 mutex_lock(&lgr
->rmbs_lock
);
1207 list_for_each_entry_safe(buf_desc
, bf
, &lgr
->rmbs
[i
], list
)
1208 smcr_buf_unmap_link(buf_desc
, true, lnk
);
1209 mutex_unlock(&lgr
->rmbs_lock
);
1210 mutex_lock(&lgr
->sndbufs_lock
);
1211 list_for_each_entry_safe(buf_desc
, bf
, &lgr
->sndbufs
[i
],
1213 smcr_buf_unmap_link(buf_desc
, false, lnk
);
1214 mutex_unlock(&lgr
->sndbufs_lock
);
1218 static void smcr_rtoken_clear_link(struct smc_link
*lnk
)
1220 struct smc_link_group
*lgr
= lnk
->lgr
;
1223 for (i
= 0; i
< SMC_RMBS_PER_LGR_MAX
; i
++) {
1224 lgr
->rtokens
[i
][lnk
->link_idx
].rkey
= 0;
1225 lgr
->rtokens
[i
][lnk
->link_idx
].dma_addr
= 0;
1229 static void __smcr_link_clear(struct smc_link
*lnk
)
1231 struct smc_link_group
*lgr
= lnk
->lgr
;
1232 struct smc_ib_device
*smcibdev
;
1234 smc_wr_free_link_mem(lnk
);
1235 smc_ibdev_cnt_dec(lnk
);
1236 put_device(&lnk
->smcibdev
->ibdev
->dev
);
1237 smcibdev
= lnk
->smcibdev
;
1238 memset(lnk
, 0, sizeof(struct smc_link
));
1239 lnk
->state
= SMC_LNK_UNUSED
;
1240 if (!atomic_dec_return(&smcibdev
->lnk_cnt
))
1241 wake_up(&smcibdev
->lnks_deleted
);
1242 smc_lgr_put(lgr
); /* lgr_hold in smcr_link_init() */
1245 /* must be called under lgr->llc_conf_mutex lock */
1246 void smcr_link_clear(struct smc_link
*lnk
, bool log
)
1248 if (!lnk
->lgr
|| lnk
->clearing
||
1249 lnk
->state
== SMC_LNK_UNUSED
)
1253 smc_llc_link_clear(lnk
, log
);
1254 smcr_buf_unmap_lgr(lnk
);
1255 smcr_rtoken_clear_link(lnk
);
1256 smc_ib_modify_qp_error(lnk
);
1257 smc_wr_free_link(lnk
);
1258 smc_ib_destroy_queue_pair(lnk
);
1259 smc_ib_dealloc_protection_domain(lnk
);
1260 smcr_link_put(lnk
); /* theoretically last link_put */
1263 void smcr_link_hold(struct smc_link
*lnk
)
1265 refcount_inc(&lnk
->refcnt
);
1268 void smcr_link_put(struct smc_link
*lnk
)
1270 if (refcount_dec_and_test(&lnk
->refcnt
))
1271 __smcr_link_clear(lnk
);
1274 static void smcr_buf_free(struct smc_link_group
*lgr
, bool is_rmb
,
1275 struct smc_buf_desc
*buf_desc
)
1279 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++)
1280 smcr_buf_unmap_link(buf_desc
, is_rmb
, &lgr
->lnk
[i
]);
1282 if (buf_desc
->pages
)
1283 __free_pages(buf_desc
->pages
, buf_desc
->order
);
1287 static void smcd_buf_free(struct smc_link_group
*lgr
, bool is_dmb
,
1288 struct smc_buf_desc
*buf_desc
)
1291 /* restore original buf len */
1292 buf_desc
->len
+= sizeof(struct smcd_cdc_msg
);
1293 smc_ism_unregister_dmb(lgr
->smcd
, buf_desc
);
1295 kfree(buf_desc
->cpu_addr
);
1300 static void smc_buf_free(struct smc_link_group
*lgr
, bool is_rmb
,
1301 struct smc_buf_desc
*buf_desc
)
1304 smcd_buf_free(lgr
, is_rmb
, buf_desc
);
1306 smcr_buf_free(lgr
, is_rmb
, buf_desc
);
1309 static void __smc_lgr_free_bufs(struct smc_link_group
*lgr
, bool is_rmb
)
1311 struct smc_buf_desc
*buf_desc
, *bf_desc
;
1312 struct list_head
*buf_list
;
1315 for (i
= 0; i
< SMC_RMBE_SIZES
; i
++) {
1317 buf_list
= &lgr
->rmbs
[i
];
1319 buf_list
= &lgr
->sndbufs
[i
];
1320 list_for_each_entry_safe(buf_desc
, bf_desc
, buf_list
,
1322 list_del(&buf_desc
->list
);
1323 smc_buf_free(lgr
, is_rmb
, buf_desc
);
1328 static void smc_lgr_free_bufs(struct smc_link_group
*lgr
)
1330 /* free send buffers */
1331 __smc_lgr_free_bufs(lgr
, false);
1333 __smc_lgr_free_bufs(lgr
, true);
1336 /* won't be freed until no one accesses to lgr anymore */
1337 static void __smc_lgr_free(struct smc_link_group
*lgr
)
1339 smc_lgr_free_bufs(lgr
);
1341 if (!atomic_dec_return(&lgr
->smcd
->lgr_cnt
))
1342 wake_up(&lgr
->smcd
->lgrs_deleted
);
1344 smc_wr_free_lgr_mem(lgr
);
1345 if (!atomic_dec_return(&lgr_cnt
))
1346 wake_up(&lgrs_deleted
);
1351 /* remove a link group */
1352 static void smc_lgr_free(struct smc_link_group
*lgr
)
1356 if (!lgr
->is_smcd
) {
1357 mutex_lock(&lgr
->llc_conf_mutex
);
1358 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
1359 if (lgr
->lnk
[i
].state
!= SMC_LNK_UNUSED
)
1360 smcr_link_clear(&lgr
->lnk
[i
], false);
1362 mutex_unlock(&lgr
->llc_conf_mutex
);
1363 smc_llc_lgr_clear(lgr
);
1366 destroy_workqueue(lgr
->tx_wq
);
1368 smc_ism_put_vlan(lgr
->smcd
, lgr
->vlan_id
);
1369 put_device(&lgr
->smcd
->dev
);
1371 smc_lgr_put(lgr
); /* theoretically last lgr_put */
1374 void smc_lgr_hold(struct smc_link_group
*lgr
)
1376 refcount_inc(&lgr
->refcnt
);
1379 void smc_lgr_put(struct smc_link_group
*lgr
)
1381 if (refcount_dec_and_test(&lgr
->refcnt
))
1382 __smc_lgr_free(lgr
);
1385 static void smc_sk_wake_ups(struct smc_sock
*smc
)
1387 smc
->sk
.sk_write_space(&smc
->sk
);
1388 smc
->sk
.sk_data_ready(&smc
->sk
);
1389 smc
->sk
.sk_state_change(&smc
->sk
);
1392 /* kill a connection */
1393 static void smc_conn_kill(struct smc_connection
*conn
, bool soft
)
1395 struct smc_sock
*smc
= container_of(conn
, struct smc_sock
, conn
);
1397 if (conn
->lgr
->is_smcd
&& conn
->lgr
->peer_shutdown
)
1398 conn
->local_tx_ctrl
.conn_state_flags
.peer_conn_abort
= 1;
1400 smc_close_abort(conn
);
1402 smc
->sk
.sk_err
= ECONNABORTED
;
1403 smc_sk_wake_ups(smc
);
1404 if (conn
->lgr
->is_smcd
) {
1405 smc_ism_unset_conn(conn
);
1407 tasklet_kill(&conn
->rx_tsklet
);
1409 tasklet_unlock_wait(&conn
->rx_tsklet
);
1411 smc_cdc_wait_pend_tx_wr(conn
);
1413 smc_lgr_unregister_conn(conn
);
1414 smc_close_active_abort(smc
);
1417 static void smc_lgr_cleanup(struct smc_link_group
*lgr
)
1420 smc_ism_signal_shutdown(lgr
);
1422 u32 rsn
= lgr
->llc_termination_rsn
;
1425 rsn
= SMC_LLC_DEL_PROG_INIT_TERM
;
1426 smc_llc_send_link_delete_all(lgr
, false, rsn
);
1427 smcr_lgr_link_deactivate_all(lgr
);
1431 /* terminate link group
1432 * @soft: true if link group shutdown can take its time
1433 * false if immediate link group shutdown is required
1435 static void __smc_lgr_terminate(struct smc_link_group
*lgr
, bool soft
)
1437 struct smc_connection
*conn
;
1438 struct smc_sock
*smc
;
1439 struct rb_node
*node
;
1441 if (lgr
->terminating
)
1442 return; /* lgr already terminating */
1443 /* cancel free_work sync, will terminate when lgr->freeing is set */
1444 cancel_delayed_work_sync(&lgr
->free_work
);
1445 lgr
->terminating
= 1;
1447 /* kill remaining link group connections */
1448 read_lock_bh(&lgr
->conns_lock
);
1449 node
= rb_first(&lgr
->conns_all
);
1451 read_unlock_bh(&lgr
->conns_lock
);
1452 conn
= rb_entry(node
, struct smc_connection
, alert_node
);
1453 smc
= container_of(conn
, struct smc_sock
, conn
);
1454 sock_hold(&smc
->sk
); /* sock_put below */
1455 lock_sock(&smc
->sk
);
1456 smc_conn_kill(conn
, soft
);
1457 release_sock(&smc
->sk
);
1458 sock_put(&smc
->sk
); /* sock_hold above */
1459 read_lock_bh(&lgr
->conns_lock
);
1460 node
= rb_first(&lgr
->conns_all
);
1462 read_unlock_bh(&lgr
->conns_lock
);
1463 smc_lgr_cleanup(lgr
);
1467 /* unlink link group and schedule termination */
1468 void smc_lgr_terminate_sched(struct smc_link_group
*lgr
)
1470 spinlock_t
*lgr_lock
;
1472 smc_lgr_list_head(lgr
, &lgr_lock
);
1473 spin_lock_bh(lgr_lock
);
1474 if (list_empty(&lgr
->list
) || lgr
->terminating
|| lgr
->freeing
) {
1475 spin_unlock_bh(lgr_lock
);
1476 return; /* lgr already terminating */
1478 list_del_init(&lgr
->list
);
1480 spin_unlock_bh(lgr_lock
);
1481 schedule_work(&lgr
->terminate_work
);
1484 /* Called when peer lgr shutdown (regularly or abnormally) is received */
1485 void smc_smcd_terminate(struct smcd_dev
*dev
, u64 peer_gid
, unsigned short vlan
)
1487 struct smc_link_group
*lgr
, *l
;
1488 LIST_HEAD(lgr_free_list
);
1490 /* run common cleanup function and build free list */
1491 spin_lock_bh(&dev
->lgr_lock
);
1492 list_for_each_entry_safe(lgr
, l
, &dev
->lgr_list
, list
) {
1493 if ((!peer_gid
|| lgr
->peer_gid
== peer_gid
) &&
1494 (vlan
== VLAN_VID_MASK
|| lgr
->vlan_id
== vlan
)) {
1495 if (peer_gid
) /* peer triggered termination */
1496 lgr
->peer_shutdown
= 1;
1497 list_move(&lgr
->list
, &lgr_free_list
);
1501 spin_unlock_bh(&dev
->lgr_lock
);
1503 /* cancel the regular free workers and actually free lgrs */
1504 list_for_each_entry_safe(lgr
, l
, &lgr_free_list
, list
) {
1505 list_del_init(&lgr
->list
);
1506 schedule_work(&lgr
->terminate_work
);
1510 /* Called when an SMCD device is removed or the smc module is unloaded */
1511 void smc_smcd_terminate_all(struct smcd_dev
*smcd
)
1513 struct smc_link_group
*lgr
, *lg
;
1514 LIST_HEAD(lgr_free_list
);
1516 spin_lock_bh(&smcd
->lgr_lock
);
1517 list_splice_init(&smcd
->lgr_list
, &lgr_free_list
);
1518 list_for_each_entry(lgr
, &lgr_free_list
, list
)
1520 spin_unlock_bh(&smcd
->lgr_lock
);
1522 list_for_each_entry_safe(lgr
, lg
, &lgr_free_list
, list
) {
1523 list_del_init(&lgr
->list
);
1524 __smc_lgr_terminate(lgr
, false);
1527 if (atomic_read(&smcd
->lgr_cnt
))
1528 wait_event(smcd
->lgrs_deleted
, !atomic_read(&smcd
->lgr_cnt
));
1531 /* Called when an SMCR device is removed or the smc module is unloaded.
1532 * If smcibdev is given, all SMCR link groups using this device are terminated.
1533 * If smcibdev is NULL, all SMCR link groups are terminated.
1535 void smc_smcr_terminate_all(struct smc_ib_device
*smcibdev
)
1537 struct smc_link_group
*lgr
, *lg
;
1538 LIST_HEAD(lgr_free_list
);
1541 spin_lock_bh(&smc_lgr_list
.lock
);
1543 list_splice_init(&smc_lgr_list
.list
, &lgr_free_list
);
1544 list_for_each_entry(lgr
, &lgr_free_list
, list
)
1547 list_for_each_entry_safe(lgr
, lg
, &smc_lgr_list
.list
, list
) {
1548 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
1549 if (lgr
->lnk
[i
].smcibdev
== smcibdev
)
1550 smcr_link_down_cond_sched(&lgr
->lnk
[i
]);
1554 spin_unlock_bh(&smc_lgr_list
.lock
);
1556 list_for_each_entry_safe(lgr
, lg
, &lgr_free_list
, list
) {
1557 list_del_init(&lgr
->list
);
1558 smc_llc_set_termination_rsn(lgr
, SMC_LLC_DEL_OP_INIT_TERM
);
1559 __smc_lgr_terminate(lgr
, false);
1563 if (atomic_read(&smcibdev
->lnk_cnt
))
1564 wait_event(smcibdev
->lnks_deleted
,
1565 !atomic_read(&smcibdev
->lnk_cnt
));
1567 if (atomic_read(&lgr_cnt
))
1568 wait_event(lgrs_deleted
, !atomic_read(&lgr_cnt
));
1572 /* set new lgr type and clear all asymmetric link tagging */
1573 void smcr_lgr_set_type(struct smc_link_group
*lgr
, enum smc_lgr_type new_type
)
1575 char *lgr_type
= "";
1578 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++)
1579 if (smc_link_usable(&lgr
->lnk
[i
]))
1580 lgr
->lnk
[i
].link_is_asym
= false;
1581 if (lgr
->type
== new_type
)
1583 lgr
->type
= new_type
;
1585 switch (lgr
->type
) {
1589 case SMC_LGR_SINGLE
:
1590 lgr_type
= "SINGLE";
1592 case SMC_LGR_SYMMETRIC
:
1593 lgr_type
= "SYMMETRIC";
1595 case SMC_LGR_ASYMMETRIC_PEER
:
1596 lgr_type
= "ASYMMETRIC_PEER";
1598 case SMC_LGR_ASYMMETRIC_LOCAL
:
1599 lgr_type
= "ASYMMETRIC_LOCAL";
1602 pr_warn_ratelimited("smc: SMC-R lg %*phN net %llu state changed: "
1603 "%s, pnetid %.16s\n", SMC_LGR_ID_SIZE
, &lgr
->id
,
1604 lgr
->net
->net_cookie
, lgr_type
, lgr
->pnet_id
);
1607 /* set new lgr type and tag a link as asymmetric */
1608 void smcr_lgr_set_type_asym(struct smc_link_group
*lgr
,
1609 enum smc_lgr_type new_type
, int asym_lnk_idx
)
1611 smcr_lgr_set_type(lgr
, new_type
);
1612 lgr
->lnk
[asym_lnk_idx
].link_is_asym
= true;
1615 /* abort connection, abort_work scheduled from tasklet context */
1616 static void smc_conn_abort_work(struct work_struct
*work
)
1618 struct smc_connection
*conn
= container_of(work
,
1619 struct smc_connection
,
1621 struct smc_sock
*smc
= container_of(conn
, struct smc_sock
, conn
);
1623 lock_sock(&smc
->sk
);
1624 smc_conn_kill(conn
, true);
1625 release_sock(&smc
->sk
);
1626 sock_put(&smc
->sk
); /* sock_hold done by schedulers of abort_work */
1629 void smcr_port_add(struct smc_ib_device
*smcibdev
, u8 ibport
)
1631 struct smc_link_group
*lgr
, *n
;
1633 list_for_each_entry_safe(lgr
, n
, &smc_lgr_list
.list
, list
) {
1634 struct smc_link
*link
;
1636 if (strncmp(smcibdev
->pnetid
[ibport
- 1], lgr
->pnet_id
,
1637 SMC_MAX_PNETID_LEN
) ||
1638 lgr
->type
== SMC_LGR_SYMMETRIC
||
1639 lgr
->type
== SMC_LGR_ASYMMETRIC_PEER
||
1640 !rdma_dev_access_netns(smcibdev
->ibdev
, lgr
->net
))
1643 /* trigger local add link processing */
1644 link
= smc_llc_usable_link(lgr
);
1646 smc_llc_add_link_local(link
);
1650 /* link is down - switch connections to alternate link,
1651 * must be called under lgr->llc_conf_mutex lock
1653 static void smcr_link_down(struct smc_link
*lnk
)
1655 struct smc_link_group
*lgr
= lnk
->lgr
;
1656 struct smc_link
*to_lnk
;
1659 if (!lgr
|| lnk
->state
== SMC_LNK_UNUSED
|| list_empty(&lgr
->list
))
1662 to_lnk
= smc_switch_conns(lgr
, lnk
, true);
1663 if (!to_lnk
) { /* no backup link available */
1664 smcr_link_clear(lnk
, true);
1667 smcr_lgr_set_type(lgr
, SMC_LGR_SINGLE
);
1668 del_link_id
= lnk
->link_id
;
1670 if (lgr
->role
== SMC_SERV
) {
1671 /* trigger local delete link processing */
1672 smc_llc_srv_delete_link_local(to_lnk
, del_link_id
);
1674 if (lgr
->llc_flow_lcl
.type
!= SMC_LLC_FLOW_NONE
) {
1675 /* another llc task is ongoing */
1676 mutex_unlock(&lgr
->llc_conf_mutex
);
1677 wait_event_timeout(lgr
->llc_flow_waiter
,
1678 (list_empty(&lgr
->list
) ||
1679 lgr
->llc_flow_lcl
.type
== SMC_LLC_FLOW_NONE
),
1681 mutex_lock(&lgr
->llc_conf_mutex
);
1683 if (!list_empty(&lgr
->list
)) {
1684 smc_llc_send_delete_link(to_lnk
, del_link_id
,
1686 SMC_LLC_DEL_LOST_PATH
);
1687 smcr_link_clear(lnk
, true);
1689 wake_up(&lgr
->llc_flow_waiter
); /* wake up next waiter */
1693 /* must be called under lgr->llc_conf_mutex lock */
1694 void smcr_link_down_cond(struct smc_link
*lnk
)
1696 if (smc_link_downing(&lnk
->state
)) {
1697 trace_smcr_link_down(lnk
, __builtin_return_address(0));
1698 smcr_link_down(lnk
);
1702 /* will get the lgr->llc_conf_mutex lock */
1703 void smcr_link_down_cond_sched(struct smc_link
*lnk
)
1705 if (smc_link_downing(&lnk
->state
)) {
1706 trace_smcr_link_down(lnk
, __builtin_return_address(0));
1707 schedule_work(&lnk
->link_down_wrk
);
1711 void smcr_port_err(struct smc_ib_device
*smcibdev
, u8 ibport
)
1713 struct smc_link_group
*lgr
, *n
;
1716 list_for_each_entry_safe(lgr
, n
, &smc_lgr_list
.list
, list
) {
1717 if (strncmp(smcibdev
->pnetid
[ibport
- 1], lgr
->pnet_id
,
1718 SMC_MAX_PNETID_LEN
))
1719 continue; /* lgr is not affected */
1720 if (list_empty(&lgr
->list
))
1722 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
1723 struct smc_link
*lnk
= &lgr
->lnk
[i
];
1725 if (smc_link_usable(lnk
) &&
1726 lnk
->smcibdev
== smcibdev
&& lnk
->ibport
== ibport
)
1727 smcr_link_down_cond_sched(lnk
);
1732 static void smc_link_down_work(struct work_struct
*work
)
1734 struct smc_link
*link
= container_of(work
, struct smc_link
,
1736 struct smc_link_group
*lgr
= link
->lgr
;
1738 if (list_empty(&lgr
->list
))
1740 wake_up_all(&lgr
->llc_msg_waiter
);
1741 mutex_lock(&lgr
->llc_conf_mutex
);
1742 smcr_link_down(link
);
1743 mutex_unlock(&lgr
->llc_conf_mutex
);
1746 static int smc_vlan_by_tcpsk_walk(struct net_device
*lower_dev
,
1747 struct netdev_nested_priv
*priv
)
1749 unsigned short *vlan_id
= (unsigned short *)priv
->data
;
1751 if (is_vlan_dev(lower_dev
)) {
1752 *vlan_id
= vlan_dev_vlan_id(lower_dev
);
1759 /* Determine vlan of internal TCP socket. */
1760 int smc_vlan_by_tcpsk(struct socket
*clcsock
, struct smc_init_info
*ini
)
1762 struct dst_entry
*dst
= sk_dst_get(clcsock
->sk
);
1763 struct netdev_nested_priv priv
;
1764 struct net_device
*ndev
;
1778 if (is_vlan_dev(ndev
)) {
1779 ini
->vlan_id
= vlan_dev_vlan_id(ndev
);
1783 priv
.data
= (void *)&ini
->vlan_id
;
1785 netdev_walk_all_lower_dev(ndev
, smc_vlan_by_tcpsk_walk
, &priv
);
1794 static bool smcr_lgr_match(struct smc_link_group
*lgr
, u8 smcr_version
,
1798 enum smc_lgr_role role
, u32 clcqpn
,
1801 struct smc_link
*lnk
;
1804 if (memcmp(lgr
->peer_systemid
, peer_systemid
, SMC_SYSTEMID_LEN
) ||
1808 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
1811 if (!smc_link_active(lnk
))
1813 /* use verbs API to check netns, instead of lgr->net */
1814 if (!rdma_dev_access_netns(lnk
->smcibdev
->ibdev
, net
))
1816 if ((lgr
->role
== SMC_SERV
|| lnk
->peer_qpn
== clcqpn
) &&
1817 !memcmp(lnk
->peer_gid
, peer_gid
, SMC_GID_SIZE
) &&
1818 (smcr_version
== SMC_V2
||
1819 !memcmp(lnk
->peer_mac
, peer_mac_v1
, ETH_ALEN
)))
1825 static bool smcd_lgr_match(struct smc_link_group
*lgr
,
1826 struct smcd_dev
*smcismdev
, u64 peer_gid
)
1828 return lgr
->peer_gid
== peer_gid
&& lgr
->smcd
== smcismdev
;
1831 /* create a new SMC connection (and a new link group if necessary) */
1832 int smc_conn_create(struct smc_sock
*smc
, struct smc_init_info
*ini
)
1834 struct smc_connection
*conn
= &smc
->conn
;
1835 struct net
*net
= sock_net(&smc
->sk
);
1836 struct list_head
*lgr_list
;
1837 struct smc_link_group
*lgr
;
1838 enum smc_lgr_role role
;
1839 spinlock_t
*lgr_lock
;
1842 lgr_list
= ini
->is_smcd
? &ini
->ism_dev
[ini
->ism_selected
]->lgr_list
:
1844 lgr_lock
= ini
->is_smcd
? &ini
->ism_dev
[ini
->ism_selected
]->lgr_lock
:
1846 ini
->first_contact_local
= 1;
1847 role
= smc
->listen_smc
? SMC_SERV
: SMC_CLNT
;
1848 if (role
== SMC_CLNT
&& ini
->first_contact_peer
)
1849 /* create new link group as well */
1852 /* determine if an existing link group can be reused */
1853 spin_lock_bh(lgr_lock
);
1854 list_for_each_entry(lgr
, lgr_list
, list
) {
1855 write_lock_bh(&lgr
->conns_lock
);
1857 smcd_lgr_match(lgr
, ini
->ism_dev
[ini
->ism_selected
],
1858 ini
->ism_peer_gid
[ini
->ism_selected
]) :
1859 smcr_lgr_match(lgr
, ini
->smcr_version
,
1861 ini
->peer_gid
, ini
->peer_mac
, role
,
1862 ini
->ib_clcqpn
, net
)) &&
1864 (ini
->smcd_version
== SMC_V2
||
1865 lgr
->vlan_id
== ini
->vlan_id
) &&
1866 (role
== SMC_CLNT
|| ini
->is_smcd
||
1867 (lgr
->conns_num
< SMC_RMBS_PER_LGR_MAX
&&
1868 !bitmap_full(lgr
->rtokens_used_mask
, SMC_RMBS_PER_LGR_MAX
)))) {
1869 /* link group found */
1870 ini
->first_contact_local
= 0;
1872 rc
= smc_lgr_register_conn(conn
, false);
1873 write_unlock_bh(&lgr
->conns_lock
);
1874 if (!rc
&& delayed_work_pending(&lgr
->free_work
))
1875 cancel_delayed_work(&lgr
->free_work
);
1878 write_unlock_bh(&lgr
->conns_lock
);
1880 spin_unlock_bh(lgr_lock
);
1884 if (role
== SMC_CLNT
&& !ini
->first_contact_peer
&&
1885 ini
->first_contact_local
) {
1886 /* Server reuses a link group, but Client wants to start
1888 * send out_of_sync decline, reason synchr. error
1890 return SMC_CLC_DECL_SYNCERR
;
1894 if (ini
->first_contact_local
) {
1895 rc
= smc_lgr_create(smc
, ini
);
1899 write_lock_bh(&lgr
->conns_lock
);
1900 rc
= smc_lgr_register_conn(conn
, true);
1901 write_unlock_bh(&lgr
->conns_lock
);
1903 smc_lgr_cleanup_early(lgr
);
1907 smc_lgr_hold(conn
->lgr
); /* lgr_put in smc_conn_free() */
1908 if (!conn
->lgr
->is_smcd
)
1909 smcr_link_hold(conn
->lnk
); /* link_put in smc_conn_free() */
1911 conn
->local_tx_ctrl
.common
.type
= SMC_CDC_MSG_TYPE
;
1912 conn
->local_tx_ctrl
.len
= SMC_WR_TX_SIZE
;
1913 conn
->urg_state
= SMC_URG_READ
;
1914 init_waitqueue_head(&conn
->cdc_pend_tx_wq
);
1915 INIT_WORK(&smc
->conn
.abort_work
, smc_conn_abort_work
);
1917 conn
->rx_off
= sizeof(struct smcd_cdc_msg
);
1918 smcd_cdc_rx_init(conn
); /* init tasklet for this conn */
1922 #ifndef KERNEL_HAS_ATOMIC64
1923 spin_lock_init(&conn
->acurs_lock
);
1930 #define SMCD_DMBE_SIZES 6 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
1931 #define SMCR_RMBE_SIZES 5 /* 0 -> 16KB, 1 -> 32KB, .. 5 -> 512KB */
1933 /* convert the RMB size into the compressed notation (minimum 16K, see
1934 * SMCD/R_DMBE_SIZES.
1935 * In contrast to plain ilog2, this rounds towards the next power of 2,
1936 * so the socket application gets at least its desired sndbuf / rcvbuf size.
1938 static u8
smc_compress_bufsize(int size
, bool is_smcd
, bool is_rmb
)
1940 const unsigned int max_scat
= SG_MAX_SINGLE_ALLOC
* PAGE_SIZE
;
1943 if (size
<= SMC_BUF_MIN_SIZE
)
1946 size
= (size
- 1) >> 14; /* convert to 16K multiple */
1947 compressed
= min_t(u8
, ilog2(size
) + 1,
1948 is_smcd
? SMCD_DMBE_SIZES
: SMCR_RMBE_SIZES
);
1950 if (!is_smcd
&& is_rmb
)
1951 /* RMBs are backed by & limited to max size of scatterlists */
1952 compressed
= min_t(u8
, compressed
, ilog2(max_scat
>> 14));
1957 /* convert the RMB size from compressed notation into integer */
1958 int smc_uncompress_bufsize(u8 compressed
)
1962 size
= 0x00000001 << (((int)compressed
) + 14);
1966 /* try to reuse a sndbuf or rmb description slot for a certain
1967 * buffer size; if not available, return NULL
1969 static struct smc_buf_desc
*smc_buf_get_slot(int compressed_bufsize
,
1971 struct list_head
*buf_list
)
1973 struct smc_buf_desc
*buf_slot
;
1976 list_for_each_entry(buf_slot
, buf_list
, list
) {
1977 if (cmpxchg(&buf_slot
->used
, 0, 1) == 0) {
1986 /* one of the conditions for announcing a receiver's current window size is
1987 * that it "results in a minimum increase in the window size of 10% of the
1988 * receive buffer space" [RFC7609]
1990 static inline int smc_rmb_wnd_update_limit(int rmbe_size
)
1992 return max_t(int, rmbe_size
/ 10, SOCK_MIN_SNDBUF
/ 2);
1995 /* map an rmb buf to a link */
1996 static int smcr_buf_map_link(struct smc_buf_desc
*buf_desc
, bool is_rmb
,
1997 struct smc_link
*lnk
)
2001 if (buf_desc
->is_map_ib
[lnk
->link_idx
])
2004 rc
= sg_alloc_table(&buf_desc
->sgt
[lnk
->link_idx
], 1, GFP_KERNEL
);
2007 sg_set_buf(buf_desc
->sgt
[lnk
->link_idx
].sgl
,
2008 buf_desc
->cpu_addr
, buf_desc
->len
);
2010 /* map sg table to DMA address */
2011 rc
= smc_ib_buf_map_sg(lnk
, buf_desc
,
2012 is_rmb
? DMA_FROM_DEVICE
: DMA_TO_DEVICE
);
2013 /* SMC protocol depends on mapping to one DMA address only */
2019 /* create a new memory region for the RMB */
2021 rc
= smc_ib_get_memory_region(lnk
->roce_pd
,
2022 IB_ACCESS_REMOTE_WRITE
|
2023 IB_ACCESS_LOCAL_WRITE
,
2024 buf_desc
, lnk
->link_idx
);
2027 smc_ib_sync_sg_for_device(lnk
, buf_desc
, DMA_FROM_DEVICE
);
2029 buf_desc
->is_map_ib
[lnk
->link_idx
] = true;
2033 smc_ib_buf_unmap_sg(lnk
, buf_desc
,
2034 is_rmb
? DMA_FROM_DEVICE
: DMA_TO_DEVICE
);
2036 sg_free_table(&buf_desc
->sgt
[lnk
->link_idx
]);
2040 /* register a new rmb on IB device,
2041 * must be called under lgr->llc_conf_mutex lock
2043 int smcr_link_reg_rmb(struct smc_link
*link
, struct smc_buf_desc
*rmb_desc
)
2045 if (list_empty(&link
->lgr
->list
))
2047 if (!rmb_desc
->is_reg_mr
[link
->link_idx
]) {
2048 /* register memory region for new rmb */
2049 if (smc_wr_reg_send(link
, rmb_desc
->mr_rx
[link
->link_idx
])) {
2050 rmb_desc
->is_reg_err
= true;
2053 rmb_desc
->is_reg_mr
[link
->link_idx
] = true;
2058 static int _smcr_buf_map_lgr(struct smc_link
*lnk
, struct mutex
*lock
,
2059 struct list_head
*lst
, bool is_rmb
)
2061 struct smc_buf_desc
*buf_desc
, *bf
;
2065 list_for_each_entry_safe(buf_desc
, bf
, lst
, list
) {
2066 if (!buf_desc
->used
)
2068 rc
= smcr_buf_map_link(buf_desc
, is_rmb
, lnk
);
2077 /* map all used buffers of lgr for a new link */
2078 int smcr_buf_map_lgr(struct smc_link
*lnk
)
2080 struct smc_link_group
*lgr
= lnk
->lgr
;
2083 for (i
= 0; i
< SMC_RMBE_SIZES
; i
++) {
2084 rc
= _smcr_buf_map_lgr(lnk
, &lgr
->rmbs_lock
,
2085 &lgr
->rmbs
[i
], true);
2088 rc
= _smcr_buf_map_lgr(lnk
, &lgr
->sndbufs_lock
,
2089 &lgr
->sndbufs
[i
], false);
2096 /* register all used buffers of lgr for a new link,
2097 * must be called under lgr->llc_conf_mutex lock
2099 int smcr_buf_reg_lgr(struct smc_link
*lnk
)
2101 struct smc_link_group
*lgr
= lnk
->lgr
;
2102 struct smc_buf_desc
*buf_desc
, *bf
;
2105 mutex_lock(&lgr
->rmbs_lock
);
2106 for (i
= 0; i
< SMC_RMBE_SIZES
; i
++) {
2107 list_for_each_entry_safe(buf_desc
, bf
, &lgr
->rmbs
[i
], list
) {
2108 if (!buf_desc
->used
)
2110 rc
= smcr_link_reg_rmb(lnk
, buf_desc
);
2116 mutex_unlock(&lgr
->rmbs_lock
);
2120 static struct smc_buf_desc
*smcr_new_buf_create(struct smc_link_group
*lgr
,
2121 bool is_rmb
, int bufsize
)
2123 struct smc_buf_desc
*buf_desc
;
2125 /* try to alloc a new buffer */
2126 buf_desc
= kzalloc(sizeof(*buf_desc
), GFP_KERNEL
);
2128 return ERR_PTR(-ENOMEM
);
2130 buf_desc
->order
= get_order(bufsize
);
2131 buf_desc
->pages
= alloc_pages(GFP_KERNEL
| __GFP_NOWARN
|
2132 __GFP_NOMEMALLOC
| __GFP_COMP
|
2133 __GFP_NORETRY
| __GFP_ZERO
,
2135 if (!buf_desc
->pages
) {
2137 return ERR_PTR(-EAGAIN
);
2139 buf_desc
->cpu_addr
= (void *)page_address(buf_desc
->pages
);
2140 buf_desc
->len
= bufsize
;
2144 /* map buf_desc on all usable links,
2145 * unused buffers stay mapped as long as the link is up
2147 static int smcr_buf_map_usable_links(struct smc_link_group
*lgr
,
2148 struct smc_buf_desc
*buf_desc
, bool is_rmb
)
2152 /* protect against parallel link reconfiguration */
2153 mutex_lock(&lgr
->llc_conf_mutex
);
2154 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
2155 struct smc_link
*lnk
= &lgr
->lnk
[i
];
2157 if (!smc_link_usable(lnk
))
2159 if (smcr_buf_map_link(buf_desc
, is_rmb
, lnk
)) {
2165 mutex_unlock(&lgr
->llc_conf_mutex
);
2169 static struct smc_buf_desc
*smcd_new_buf_create(struct smc_link_group
*lgr
,
2170 bool is_dmb
, int bufsize
)
2172 struct smc_buf_desc
*buf_desc
;
2175 /* try to alloc a new DMB */
2176 buf_desc
= kzalloc(sizeof(*buf_desc
), GFP_KERNEL
);
2178 return ERR_PTR(-ENOMEM
);
2180 rc
= smc_ism_register_dmb(lgr
, bufsize
, buf_desc
);
2184 return ERR_PTR(-EAGAIN
);
2186 return ERR_PTR(-ENOSPC
);
2187 return ERR_PTR(-EIO
);
2189 buf_desc
->pages
= virt_to_page(buf_desc
->cpu_addr
);
2190 /* CDC header stored in buf. So, pretend it was smaller */
2191 buf_desc
->len
= bufsize
- sizeof(struct smcd_cdc_msg
);
2193 buf_desc
->cpu_addr
= kzalloc(bufsize
, GFP_KERNEL
|
2194 __GFP_NOWARN
| __GFP_NORETRY
|
2196 if (!buf_desc
->cpu_addr
) {
2198 return ERR_PTR(-EAGAIN
);
2200 buf_desc
->len
= bufsize
;
2205 static int __smc_buf_create(struct smc_sock
*smc
, bool is_smcd
, bool is_rmb
)
2207 struct smc_buf_desc
*buf_desc
= ERR_PTR(-ENOMEM
);
2208 struct smc_connection
*conn
= &smc
->conn
;
2209 struct smc_link_group
*lgr
= conn
->lgr
;
2210 struct list_head
*buf_list
;
2211 int bufsize
, bufsize_short
;
2212 bool is_dgraded
= false;
2213 struct mutex
*lock
; /* lock buffer list */
2217 /* use socket recv buffer size (w/o overhead) as start value */
2218 sk_buf_size
= smc
->sk
.sk_rcvbuf
/ 2;
2220 /* use socket send buffer size (w/o overhead) as start value */
2221 sk_buf_size
= smc
->sk
.sk_sndbuf
/ 2;
2223 for (bufsize_short
= smc_compress_bufsize(sk_buf_size
, is_smcd
, is_rmb
);
2224 bufsize_short
>= 0; bufsize_short
--) {
2226 lock
= &lgr
->rmbs_lock
;
2227 buf_list
= &lgr
->rmbs
[bufsize_short
];
2229 lock
= &lgr
->sndbufs_lock
;
2230 buf_list
= &lgr
->sndbufs
[bufsize_short
];
2232 bufsize
= smc_uncompress_bufsize(bufsize_short
);
2234 /* check for reusable slot in the link group */
2235 buf_desc
= smc_buf_get_slot(bufsize_short
, lock
, buf_list
);
2237 SMC_STAT_RMB_SIZE(smc
, is_smcd
, is_rmb
, bufsize
);
2238 SMC_STAT_BUF_REUSE(smc
, is_smcd
, is_rmb
);
2239 break; /* found reusable slot */
2243 buf_desc
= smcd_new_buf_create(lgr
, is_rmb
, bufsize
);
2245 buf_desc
= smcr_new_buf_create(lgr
, is_rmb
, bufsize
);
2247 if (PTR_ERR(buf_desc
) == -ENOMEM
)
2249 if (IS_ERR(buf_desc
)) {
2252 SMC_STAT_RMB_DOWNGRADED(smc
, is_smcd
, is_rmb
);
2257 SMC_STAT_RMB_ALLOC(smc
, is_smcd
, is_rmb
);
2258 SMC_STAT_RMB_SIZE(smc
, is_smcd
, is_rmb
, bufsize
);
2261 list_add(&buf_desc
->list
, buf_list
);
2266 if (IS_ERR(buf_desc
))
2267 return PTR_ERR(buf_desc
);
2270 if (smcr_buf_map_usable_links(lgr
, buf_desc
, is_rmb
)) {
2271 smcr_buf_unuse(buf_desc
, lgr
);
2277 conn
->rmb_desc
= buf_desc
;
2278 conn
->rmbe_size_short
= bufsize_short
;
2279 smc
->sk
.sk_rcvbuf
= bufsize
* 2;
2280 atomic_set(&conn
->bytes_to_rcv
, 0);
2281 conn
->rmbe_update_limit
=
2282 smc_rmb_wnd_update_limit(buf_desc
->len
);
2284 smc_ism_set_conn(conn
); /* map RMB/smcd_dev to conn */
2286 conn
->sndbuf_desc
= buf_desc
;
2287 smc
->sk
.sk_sndbuf
= bufsize
* 2;
2288 atomic_set(&conn
->sndbuf_space
, bufsize
);
2293 void smc_sndbuf_sync_sg_for_cpu(struct smc_connection
*conn
)
2295 if (!smc_conn_lgr_valid(conn
) || conn
->lgr
->is_smcd
||
2296 !smc_link_active(conn
->lnk
))
2298 smc_ib_sync_sg_for_cpu(conn
->lnk
, conn
->sndbuf_desc
, DMA_TO_DEVICE
);
2301 void smc_sndbuf_sync_sg_for_device(struct smc_connection
*conn
)
2303 if (!smc_conn_lgr_valid(conn
) || conn
->lgr
->is_smcd
||
2304 !smc_link_active(conn
->lnk
))
2306 smc_ib_sync_sg_for_device(conn
->lnk
, conn
->sndbuf_desc
, DMA_TO_DEVICE
);
2309 void smc_rmb_sync_sg_for_cpu(struct smc_connection
*conn
)
2313 if (!smc_conn_lgr_valid(conn
) || conn
->lgr
->is_smcd
)
2315 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
2316 if (!smc_link_active(&conn
->lgr
->lnk
[i
]))
2318 smc_ib_sync_sg_for_cpu(&conn
->lgr
->lnk
[i
], conn
->rmb_desc
,
2323 void smc_rmb_sync_sg_for_device(struct smc_connection
*conn
)
2327 if (!smc_conn_lgr_valid(conn
) || conn
->lgr
->is_smcd
)
2329 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
2330 if (!smc_link_active(&conn
->lgr
->lnk
[i
]))
2332 smc_ib_sync_sg_for_device(&conn
->lgr
->lnk
[i
], conn
->rmb_desc
,
2337 /* create the send and receive buffer for an SMC socket;
2338 * receive buffers are called RMBs;
2339 * (even though the SMC protocol allows more than one RMB-element per RMB,
2340 * the Linux implementation uses just one RMB-element per RMB, i.e. uses an
2341 * extra RMB for every connection in a link group
2343 int smc_buf_create(struct smc_sock
*smc
, bool is_smcd
)
2347 /* create send buffer */
2348 rc
= __smc_buf_create(smc
, is_smcd
, false);
2352 rc
= __smc_buf_create(smc
, is_smcd
, true);
2354 mutex_lock(&smc
->conn
.lgr
->sndbufs_lock
);
2355 list_del(&smc
->conn
.sndbuf_desc
->list
);
2356 mutex_unlock(&smc
->conn
.lgr
->sndbufs_lock
);
2357 smc_buf_free(smc
->conn
.lgr
, false, smc
->conn
.sndbuf_desc
);
2358 smc
->conn
.sndbuf_desc
= NULL
;
2363 static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group
*lgr
)
2367 for_each_clear_bit(i
, lgr
->rtokens_used_mask
, SMC_RMBS_PER_LGR_MAX
) {
2368 if (!test_and_set_bit(i
, lgr
->rtokens_used_mask
))
2374 static int smc_rtoken_find_by_link(struct smc_link_group
*lgr
, int lnk_idx
,
2379 for (i
= 0; i
< SMC_RMBS_PER_LGR_MAX
; i
++) {
2380 if (test_bit(i
, lgr
->rtokens_used_mask
) &&
2381 lgr
->rtokens
[i
][lnk_idx
].rkey
== rkey
)
2387 /* set rtoken for a new link to an existing rmb */
2388 void smc_rtoken_set(struct smc_link_group
*lgr
, int link_idx
, int link_idx_new
,
2389 __be32 nw_rkey_known
, __be64 nw_vaddr
, __be32 nw_rkey
)
2393 rtok_idx
= smc_rtoken_find_by_link(lgr
, link_idx
, ntohl(nw_rkey_known
));
2394 if (rtok_idx
== -ENOENT
)
2396 lgr
->rtokens
[rtok_idx
][link_idx_new
].rkey
= ntohl(nw_rkey
);
2397 lgr
->rtokens
[rtok_idx
][link_idx_new
].dma_addr
= be64_to_cpu(nw_vaddr
);
2400 /* set rtoken for a new link whose link_id is given */
2401 void smc_rtoken_set2(struct smc_link_group
*lgr
, int rtok_idx
, int link_id
,
2402 __be64 nw_vaddr
, __be32 nw_rkey
)
2404 u64 dma_addr
= be64_to_cpu(nw_vaddr
);
2405 u32 rkey
= ntohl(nw_rkey
);
2409 for (link_idx
= 0; link_idx
< SMC_LINKS_PER_LGR_MAX
; link_idx
++) {
2410 if (lgr
->lnk
[link_idx
].link_id
== link_id
) {
2417 lgr
->rtokens
[rtok_idx
][link_idx
].rkey
= rkey
;
2418 lgr
->rtokens
[rtok_idx
][link_idx
].dma_addr
= dma_addr
;
2421 /* add a new rtoken from peer */
2422 int smc_rtoken_add(struct smc_link
*lnk
, __be64 nw_vaddr
, __be32 nw_rkey
)
2424 struct smc_link_group
*lgr
= smc_get_lgr(lnk
);
2425 u64 dma_addr
= be64_to_cpu(nw_vaddr
);
2426 u32 rkey
= ntohl(nw_rkey
);
2429 for (i
= 0; i
< SMC_RMBS_PER_LGR_MAX
; i
++) {
2430 if (lgr
->rtokens
[i
][lnk
->link_idx
].rkey
== rkey
&&
2431 lgr
->rtokens
[i
][lnk
->link_idx
].dma_addr
== dma_addr
&&
2432 test_bit(i
, lgr
->rtokens_used_mask
)) {
2433 /* already in list */
2437 i
= smc_rmb_reserve_rtoken_idx(lgr
);
2440 lgr
->rtokens
[i
][lnk
->link_idx
].rkey
= rkey
;
2441 lgr
->rtokens
[i
][lnk
->link_idx
].dma_addr
= dma_addr
;
2445 /* delete an rtoken from all links */
2446 int smc_rtoken_delete(struct smc_link
*lnk
, __be32 nw_rkey
)
2448 struct smc_link_group
*lgr
= smc_get_lgr(lnk
);
2449 u32 rkey
= ntohl(nw_rkey
);
2452 for (i
= 0; i
< SMC_RMBS_PER_LGR_MAX
; i
++) {
2453 if (lgr
->rtokens
[i
][lnk
->link_idx
].rkey
== rkey
&&
2454 test_bit(i
, lgr
->rtokens_used_mask
)) {
2455 for (j
= 0; j
< SMC_LINKS_PER_LGR_MAX
; j
++) {
2456 lgr
->rtokens
[i
][j
].rkey
= 0;
2457 lgr
->rtokens
[i
][j
].dma_addr
= 0;
2459 clear_bit(i
, lgr
->rtokens_used_mask
);
2466 /* save rkey and dma_addr received from peer during clc handshake */
2467 int smc_rmb_rtoken_handling(struct smc_connection
*conn
,
2468 struct smc_link
*lnk
,
2469 struct smc_clc_msg_accept_confirm
*clc
)
2471 conn
->rtoken_idx
= smc_rtoken_add(lnk
, clc
->r0
.rmb_dma_addr
,
2473 if (conn
->rtoken_idx
< 0)
2474 return conn
->rtoken_idx
;
2478 static void smc_core_going_away(void)
2480 struct smc_ib_device
*smcibdev
;
2481 struct smcd_dev
*smcd
;
2483 mutex_lock(&smc_ib_devices
.mutex
);
2484 list_for_each_entry(smcibdev
, &smc_ib_devices
.list
, list
) {
2487 for (i
= 0; i
< SMC_MAX_PORTS
; i
++)
2488 set_bit(i
, smcibdev
->ports_going_away
);
2490 mutex_unlock(&smc_ib_devices
.mutex
);
2492 mutex_lock(&smcd_dev_list
.mutex
);
2493 list_for_each_entry(smcd
, &smcd_dev_list
.list
, list
) {
2494 smcd
->going_away
= 1;
2496 mutex_unlock(&smcd_dev_list
.mutex
);
2499 /* Clean up all SMC link groups */
2500 static void smc_lgrs_shutdown(void)
2502 struct smcd_dev
*smcd
;
2504 smc_core_going_away();
2506 smc_smcr_terminate_all(NULL
);
2508 mutex_lock(&smcd_dev_list
.mutex
);
2509 list_for_each_entry(smcd
, &smcd_dev_list
.list
, list
)
2510 smc_smcd_terminate_all(smcd
);
2511 mutex_unlock(&smcd_dev_list
.mutex
);
2514 static int smc_core_reboot_event(struct notifier_block
*this,
2515 unsigned long event
, void *ptr
)
2517 smc_lgrs_shutdown();
2518 smc_ib_unregister_client();
2522 static struct notifier_block smc_reboot_notifier
= {
2523 .notifier_call
= smc_core_reboot_event
,
2526 int __init
smc_core_init(void)
2528 return register_reboot_notifier(&smc_reboot_notifier
);
2531 /* Called (from smc_exit) when module is removed */
2532 void smc_core_exit(void)
2534 unregister_reboot_notifier(&smc_reboot_notifier
);
2535 smc_lgrs_shutdown();