2 * Shared Memory Communications over RDMA (SMC-R) and RoCE
4 * Generic netlink support functions to configure an SMC-R PNET table
6 * Copyright IBM Corp. 2016
8 * Author(s): Thomas Richter <tmricht@linux.vnet.ibm.com>
11 #include <linux/module.h>
12 #include <linux/list.h>
13 #include <linux/ctype.h>
14 #include <net/netlink.h>
15 #include <net/genetlink.h>
17 #include <uapi/linux/if.h>
18 #include <uapi/linux/smc.h>
20 #include <rdma/ib_verbs.h>
25 #define SMC_MAX_PNET_ID_LEN 16 /* Max. length of PNET id */
27 static struct nla_policy smc_pnet_policy
[SMC_PNETID_MAX
+ 1] = {
29 .type
= NLA_NUL_STRING
,
30 .len
= SMC_MAX_PNET_ID_LEN
- 1
32 [SMC_PNETID_ETHNAME
] = {
33 .type
= NLA_NUL_STRING
,
36 [SMC_PNETID_IBNAME
] = {
37 .type
= NLA_NUL_STRING
,
38 .len
= IB_DEVICE_NAME_MAX
- 1
40 [SMC_PNETID_IBPORT
] = { .type
= NLA_U8
}
43 static struct genl_family smc_pnet_nl_family
;
46 * struct smc_pnettable - SMC PNET table anchor
47 * @lock: Lock for list action
48 * @pnetlist: List of PNETIDs
50 static struct smc_pnettable
{
52 struct list_head pnetlist
;
54 .pnetlist
= LIST_HEAD_INIT(smc_pnettable
.pnetlist
),
55 .lock
= __RW_LOCK_UNLOCKED(smc_pnettable
.lock
)
59 * struct smc_pnetentry - pnet identifier name entry
61 * @pnet_name: Pnet identifier name
62 * @ndev: pointer to network device.
63 * @smcibdev: Pointer to IB device.
65 struct smc_pnetentry
{
66 struct list_head list
;
67 char pnet_name
[SMC_MAX_PNET_ID_LEN
+ 1];
68 struct net_device
*ndev
;
69 struct smc_ib_device
*smcibdev
;
73 /* Check if two RDMA device entries are identical. Use device name and port
74 * number for comparison.
76 static bool smc_pnet_same_ibname(struct smc_pnetentry
*pnetelem
, char *ibname
,
79 return pnetelem
->ib_port
== ibport
&&
80 !strncmp(pnetelem
->smcibdev
->ibdev
->name
, ibname
,
81 sizeof(pnetelem
->smcibdev
->ibdev
->name
));
84 /* Find a pnetid in the pnet table.
86 static struct smc_pnetentry
*smc_pnet_find_pnetid(char *pnet_name
)
88 struct smc_pnetentry
*pnetelem
, *found_pnetelem
= NULL
;
90 read_lock(&smc_pnettable
.lock
);
91 list_for_each_entry(pnetelem
, &smc_pnettable
.pnetlist
, list
) {
92 if (!strncmp(pnetelem
->pnet_name
, pnet_name
,
93 sizeof(pnetelem
->pnet_name
))) {
94 found_pnetelem
= pnetelem
;
98 read_unlock(&smc_pnettable
.lock
);
99 return found_pnetelem
;
102 /* Remove a pnetid from the pnet table.
104 static int smc_pnet_remove_by_pnetid(char *pnet_name
)
106 struct smc_pnetentry
*pnetelem
, *tmp_pe
;
109 write_lock(&smc_pnettable
.lock
);
110 list_for_each_entry_safe(pnetelem
, tmp_pe
, &smc_pnettable
.pnetlist
,
112 if (!strncmp(pnetelem
->pnet_name
, pnet_name
,
113 sizeof(pnetelem
->pnet_name
))) {
114 list_del(&pnetelem
->list
);
115 dev_put(pnetelem
->ndev
);
121 write_unlock(&smc_pnettable
.lock
);
125 /* Remove a pnet entry mentioning a given network device from the pnet table.
127 static int smc_pnet_remove_by_ndev(struct net_device
*ndev
)
129 struct smc_pnetentry
*pnetelem
, *tmp_pe
;
132 write_lock(&smc_pnettable
.lock
);
133 list_for_each_entry_safe(pnetelem
, tmp_pe
, &smc_pnettable
.pnetlist
,
135 if (pnetelem
->ndev
== ndev
) {
136 list_del(&pnetelem
->list
);
137 dev_put(pnetelem
->ndev
);
143 write_unlock(&smc_pnettable
.lock
);
147 /* Remove a pnet entry mentioning a given ib device from the pnet table.
149 int smc_pnet_remove_by_ibdev(struct smc_ib_device
*ibdev
)
151 struct smc_pnetentry
*pnetelem
, *tmp_pe
;
154 write_lock(&smc_pnettable
.lock
);
155 list_for_each_entry_safe(pnetelem
, tmp_pe
, &smc_pnettable
.pnetlist
,
157 if (pnetelem
->smcibdev
== ibdev
) {
158 list_del(&pnetelem
->list
);
159 dev_put(pnetelem
->ndev
);
165 write_unlock(&smc_pnettable
.lock
);
169 /* Append a pnetid to the end of the pnet table if not already on this list.
171 static int smc_pnet_enter(struct smc_pnetentry
*new_pnetelem
)
173 struct smc_pnetentry
*pnetelem
;
176 write_lock(&smc_pnettable
.lock
);
177 list_for_each_entry(pnetelem
, &smc_pnettable
.pnetlist
, list
) {
178 if (!strncmp(pnetelem
->pnet_name
, new_pnetelem
->pnet_name
,
179 sizeof(new_pnetelem
->pnet_name
)) ||
180 !strncmp(pnetelem
->ndev
->name
, new_pnetelem
->ndev
->name
,
181 sizeof(new_pnetelem
->ndev
->name
)) ||
182 smc_pnet_same_ibname(pnetelem
,
183 new_pnetelem
->smcibdev
->ibdev
->name
,
184 new_pnetelem
->ib_port
)) {
185 dev_put(pnetelem
->ndev
);
189 list_add_tail(&new_pnetelem
->list
, &smc_pnettable
.pnetlist
);
192 write_unlock(&smc_pnettable
.lock
);
196 /* The limit for pnetid is 16 characters.
197 * Valid characters should be (single-byte character set) a-z, A-Z, 0-9.
198 * Lower case letters are converted to upper case.
199 * Interior blanks should not be used.
201 static bool smc_pnetid_valid(const char *pnet_name
, char *pnetid
)
203 char *bf
= skip_spaces(pnet_name
);
204 size_t len
= strlen(bf
);
205 char *end
= bf
+ len
;
209 while (--end
>= bf
&& isspace(*end
))
211 if (end
- bf
>= SMC_MAX_PNET_ID_LEN
)
216 *pnetid
++ = islower(*bf
) ? toupper(*bf
) : *bf
;
223 /* Find an infiniband device by a given name. The device might not exist. */
224 static struct smc_ib_device
*smc_pnet_find_ib(char *ib_name
)
226 struct smc_ib_device
*ibdev
;
228 spin_lock(&smc_ib_devices
.lock
);
229 list_for_each_entry(ibdev
, &smc_ib_devices
.list
, list
) {
230 if (!strncmp(ibdev
->ibdev
->name
, ib_name
,
231 sizeof(ibdev
->ibdev
->name
))) {
237 spin_unlock(&smc_ib_devices
.lock
);
241 /* Parse the supplied netlink attributes and fill a pnetentry structure.
242 * For ethernet and infiniband device names verify that the devices exist.
244 static int smc_pnet_fill_entry(struct net
*net
, struct smc_pnetentry
*pnetelem
,
247 char *string
, *ibname
= NULL
;
250 memset(pnetelem
, 0, sizeof(*pnetelem
));
251 INIT_LIST_HEAD(&pnetelem
->list
);
252 if (tb
[SMC_PNETID_NAME
]) {
253 string
= (char *)nla_data(tb
[SMC_PNETID_NAME
]);
254 if (!smc_pnetid_valid(string
, pnetelem
->pnet_name
)) {
259 if (tb
[SMC_PNETID_ETHNAME
]) {
260 string
= (char *)nla_data(tb
[SMC_PNETID_ETHNAME
]);
261 pnetelem
->ndev
= dev_get_by_name(net
, string
);
265 if (tb
[SMC_PNETID_IBNAME
]) {
266 ibname
= (char *)nla_data(tb
[SMC_PNETID_IBNAME
]);
267 ibname
= strim(ibname
);
268 pnetelem
->smcibdev
= smc_pnet_find_ib(ibname
);
269 if (!pnetelem
->smcibdev
) {
274 if (tb
[SMC_PNETID_IBPORT
]) {
275 pnetelem
->ib_port
= nla_get_u8(tb
[SMC_PNETID_IBPORT
]);
276 if (pnetelem
->ib_port
> SMC_MAX_PORTS
) {
285 dev_put(pnetelem
->ndev
);
289 /* Convert an smc_pnetentry to a netlink attribute sequence */
290 static int smc_pnet_set_nla(struct sk_buff
*msg
, struct smc_pnetentry
*pnetelem
)
292 if (nla_put_string(msg
, SMC_PNETID_NAME
, pnetelem
->pnet_name
) ||
293 nla_put_string(msg
, SMC_PNETID_ETHNAME
, pnetelem
->ndev
->name
) ||
294 nla_put_string(msg
, SMC_PNETID_IBNAME
,
295 pnetelem
->smcibdev
->ibdev
->name
) ||
296 nla_put_u8(msg
, SMC_PNETID_IBPORT
, pnetelem
->ib_port
))
301 /* Retrieve one PNETID entry */
302 static int smc_pnet_get(struct sk_buff
*skb
, struct genl_info
*info
)
304 struct smc_pnetentry
*pnetelem
;
309 pnetelem
= smc_pnet_find_pnetid(
310 (char *)nla_data(info
->attrs
[SMC_PNETID_NAME
]));
313 msg
= nlmsg_new(NLMSG_DEFAULT_SIZE
, GFP_KERNEL
);
317 hdr
= genlmsg_put(msg
, info
->snd_portid
, info
->snd_seq
,
318 &smc_pnet_nl_family
, 0, SMC_PNETID_GET
);
324 if (smc_pnet_set_nla(msg
, pnetelem
)) {
329 genlmsg_end(msg
, hdr
);
330 return genlmsg_reply(msg
, info
);
337 static int smc_pnet_add(struct sk_buff
*skb
, struct genl_info
*info
)
339 struct net
*net
= genl_info_net(info
);
340 struct smc_pnetentry
*pnetelem
;
343 pnetelem
= kzalloc(sizeof(*pnetelem
), GFP_KERNEL
);
346 rc
= smc_pnet_fill_entry(net
, pnetelem
, info
->attrs
);
348 rc
= smc_pnet_enter(pnetelem
);
353 rc
= smc_ib_remember_port_attr(pnetelem
->smcibdev
, pnetelem
->ib_port
);
355 smc_pnet_remove_by_pnetid(pnetelem
->pnet_name
);
359 static int smc_pnet_del(struct sk_buff
*skb
, struct genl_info
*info
)
361 return smc_pnet_remove_by_pnetid(
362 (char *)nla_data(info
->attrs
[SMC_PNETID_NAME
]));
365 static int smc_pnet_dump_start(struct netlink_callback
*cb
)
371 static int smc_pnet_dumpinfo(struct sk_buff
*skb
,
372 u32 portid
, u32 seq
, u32 flags
,
373 struct smc_pnetentry
*pnetelem
)
377 hdr
= genlmsg_put(skb
, portid
, seq
, &smc_pnet_nl_family
,
378 flags
, SMC_PNETID_GET
);
381 if (smc_pnet_set_nla(skb
, pnetelem
) < 0) {
382 genlmsg_cancel(skb
, hdr
);
385 genlmsg_end(skb
, hdr
);
389 static int smc_pnet_dump(struct sk_buff
*skb
, struct netlink_callback
*cb
)
391 struct smc_pnetentry
*pnetelem
;
394 read_lock(&smc_pnettable
.lock
);
395 list_for_each_entry(pnetelem
, &smc_pnettable
.pnetlist
, list
) {
396 if (idx
++ < cb
->args
[0])
398 if (smc_pnet_dumpinfo(skb
, NETLINK_CB(cb
->skb
).portid
,
399 cb
->nlh
->nlmsg_seq
, NLM_F_MULTI
,
406 read_unlock(&smc_pnettable
.lock
);
410 /* Remove and delete all pnetids from pnet table.
412 static int smc_pnet_flush(struct sk_buff
*skb
, struct genl_info
*info
)
414 struct smc_pnetentry
*pnetelem
, *tmp_pe
;
416 write_lock(&smc_pnettable
.lock
);
417 list_for_each_entry_safe(pnetelem
, tmp_pe
, &smc_pnettable
.pnetlist
,
419 list_del(&pnetelem
->list
);
420 dev_put(pnetelem
->ndev
);
423 write_unlock(&smc_pnettable
.lock
);
427 /* SMC_PNETID generic netlink operation definition */
428 static const struct genl_ops smc_pnet_ops
[] = {
430 .cmd
= SMC_PNETID_GET
,
431 .flags
= GENL_ADMIN_PERM
,
432 .policy
= smc_pnet_policy
,
433 .doit
= smc_pnet_get
,
434 .dumpit
= smc_pnet_dump
,
435 .start
= smc_pnet_dump_start
438 .cmd
= SMC_PNETID_ADD
,
439 .flags
= GENL_ADMIN_PERM
,
440 .policy
= smc_pnet_policy
,
444 .cmd
= SMC_PNETID_DEL
,
445 .flags
= GENL_ADMIN_PERM
,
446 .policy
= smc_pnet_policy
,
450 .cmd
= SMC_PNETID_FLUSH
,
451 .flags
= GENL_ADMIN_PERM
,
452 .policy
= smc_pnet_policy
,
453 .doit
= smc_pnet_flush
457 /* SMC_PNETID family definition */
458 static struct genl_family smc_pnet_nl_family
= {
460 .name
= SMCR_GENL_FAMILY_NAME
,
461 .version
= SMCR_GENL_FAMILY_VERSION
,
462 .maxattr
= SMC_PNETID_MAX
,
464 .module
= THIS_MODULE
,
466 .n_ops
= ARRAY_SIZE(smc_pnet_ops
)
469 static int smc_pnet_netdev_event(struct notifier_block
*this,
470 unsigned long event
, void *ptr
)
472 struct net_device
*event_dev
= netdev_notifier_info_to_dev(ptr
);
476 case NETDEV_UNREGISTER
:
477 smc_pnet_remove_by_ndev(event_dev
);
484 static struct notifier_block smc_netdev_notifier
= {
485 .notifier_call
= smc_pnet_netdev_event
488 int __init
smc_pnet_init(void)
492 rc
= genl_register_family(&smc_pnet_nl_family
);
495 rc
= register_netdevice_notifier(&smc_netdev_notifier
);
497 genl_unregister_family(&smc_pnet_nl_family
);
501 void smc_pnet_exit(void)
503 smc_pnet_flush(NULL
, NULL
);
504 unregister_netdevice_notifier(&smc_netdev_notifier
);
505 genl_unregister_family(&smc_pnet_nl_family
);
508 /* PNET table analysis for a given sock:
509 * determine ib_device and port belonging to used internal TCP socket
510 * ethernet interface.
512 void smc_pnet_find_roce_resource(struct sock
*sk
,
513 struct smc_ib_device
**smcibdev
, u8
*ibport
)
515 struct dst_entry
*dst
= sk_dst_get(sk
);
516 struct smc_pnetentry
*pnetelem
;
525 read_lock(&smc_pnettable
.lock
);
526 list_for_each_entry(pnetelem
, &smc_pnettable
.pnetlist
, list
) {
527 if (dst
->dev
== pnetelem
->ndev
) {
528 if (smc_ib_port_active(pnetelem
->smcibdev
,
529 pnetelem
->ib_port
)) {
530 *smcibdev
= pnetelem
->smcibdev
;
531 *ibport
= pnetelem
->ib_port
;
536 read_unlock(&smc_pnettable
.lock
);