From: Mark Zhang Date: Mon, 30 Jun 2025 10:52:33 +0000 (+0300) Subject: RDMA/cma: Support IB service record resolution X-Git-Tag: v6.18-rc1~102^2~82 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=a6404823fe20e06d4061bc63e0295b7165af4c14;p=thirdparty%2Flinux.git RDMA/cma: Support IB service record resolution Add new UCMA command and the corresponding CMA implementation. Userspace can send this command to request service resolution based on service name or ID. On a successful resolution, one or multiple service records are returned, the first one will be used as destination address by default. Two new CM events are added and returned to caller accordingly: - RDMA_CM_EVENT_ADDRINFO_RESOLVED: Resolve succeeded; - RDMA_CM_EVENT_ADDRINFO_ERROR: Resolve failed. Internally two new CM states are added: - RDMA_CM_ADDRINFO_QUERY: CM is in the process of IB service resolution; - RDMA_CM_ADDRINFO_RESOLVED: CM has finished the resolve process. With these new states, beside existing state transfer processes, 2 new processes are supported: 1. The default address is used: RDMA_CM_ADDR_BOUND -> RDMA_CM_ADDRINFO_QUERY -> RDMA_CM_ADDRINFO_RESOLVED -> RDMA_CM_ROUTE_QUERY 2. To use a different address: RDMA_CM_ADDR_BOUND -> RDMA_CM_ADDRINFO_QUERY-> RDMA_CM_ADDRINFO_RESOLVED -> RDMA_CM_ADDR_QUERY -> RDMA_CM_ADDR_RESOLVED -> RDMA_CM_ROUTE_QUERY In the 2nd case, resolve_addrinfo returns multiple records, a user could call rdma_resolve_addr() with the one that is not the first. Signed-off-by: Or Har-Toov Signed-off-by: Mark Zhang Reviewed-by: Vlad Dumitrescu Link: https://patch.msgid.link/b6e82ad75522a13b5efe4ff86da0e465aab04cc2.1751279794.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 9b471548e7ae..5b2d3ae3f9fc 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2076,6 +2076,7 @@ static void _destroy_id(struct rdma_id_private *id_priv, kfree(id_priv->id.route.path_rec); kfree(id_priv->id.route.path_rec_inbound); kfree(id_priv->id.route.path_rec_outbound); + kfree(id_priv->id.route.service_recs); put_net(id_priv->id.route.addr.dev_addr.net); kfree(id_priv); @@ -3382,13 +3383,18 @@ err1: int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms) { struct rdma_id_private *id_priv; + enum rdma_cm_state state; int ret; if (!timeout_ms) return -EINVAL; id_priv = container_of(id, struct rdma_id_private, id); - if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY)) + state = id_priv->state; + if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, + RDMA_CM_ROUTE_QUERY) && + !cma_comp_exch(id_priv, RDMA_CM_ADDRINFO_RESOLVED, + RDMA_CM_ROUTE_QUERY)) return -EINVAL; cma_id_get(id_priv); @@ -3409,7 +3415,7 @@ int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms) return 0; err: - cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED); + cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, state); cma_id_put(id_priv); return ret; } @@ -5506,3 +5512,129 @@ static void __exit cma_cleanup(void) module_init(cma_init); module_exit(cma_cleanup); + +static void cma_query_ib_service_handler(int status, + struct sa_service_rec *recs, + unsigned int num_recs, void *context) +{ + struct cma_work *work = context; + struct rdma_id_private *id_priv = work->id; + struct sockaddr_ib *addr; + + if (status) + goto fail; + + if (!num_recs) { + status = -ENOENT; + goto fail; + } + + if (id_priv->id.route.service_recs) { + status = -EALREADY; + goto fail; + } + + id_priv->id.route.service_recs = + kmalloc_array(num_recs, sizeof(*recs), GFP_KERNEL); + if (!id_priv->id.route.service_recs) { + status = -ENOMEM; + goto fail; + } + + id_priv->id.route.num_service_recs = num_recs; + memcpy(id_priv->id.route.service_recs, recs, sizeof(*recs) * num_recs); + + addr = (struct sockaddr_ib *)&id_priv->id.route.addr.dst_addr; + addr->sib_family = AF_IB; + addr->sib_addr = *(struct ib_addr *)&recs->gid; + addr->sib_pkey = recs->pkey; + addr->sib_sid = recs->id; + rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, + (union ib_gid *)&addr->sib_addr); + ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, + ntohs(addr->sib_pkey)); + + queue_work(cma_wq, &work->work); + return; + +fail: + work->old_state = RDMA_CM_ADDRINFO_QUERY; + work->new_state = RDMA_CM_ADDR_BOUND; + work->event.event = RDMA_CM_EVENT_ADDRINFO_ERROR; + work->event.status = status; + pr_debug_ratelimited( + "RDMA CM: SERVICE_ERROR: failed to query service record. status %d\n", + status); + queue_work(cma_wq, &work->work); +} + +static int cma_resolve_ib_service(struct rdma_id_private *id_priv, + struct rdma_ucm_ib_service *ibs) +{ + struct sa_service_rec sr = {}; + ib_sa_comp_mask mask = 0; + struct cma_work *work; + + work = kzalloc(sizeof(*work), GFP_KERNEL); + if (!work) + return -ENOMEM; + + cma_id_get(id_priv); + + work->id = id_priv; + INIT_WORK(&work->work, cma_work_handler); + work->old_state = RDMA_CM_ADDRINFO_QUERY; + work->new_state = RDMA_CM_ADDRINFO_RESOLVED; + work->event.event = RDMA_CM_EVENT_ADDRINFO_RESOLVED; + + if (ibs->flags & RDMA_USER_CM_IB_SERVICE_FLAG_ID) { + sr.id = cpu_to_be64(ibs->service_id); + mask |= IB_SA_SERVICE_REC_SERVICE_ID; + } + if (ibs->flags & RDMA_USER_CM_IB_SERVICE_FLAG_NAME) { + strscpy(sr.name, ibs->service_name, sizeof(sr.name)); + mask |= IB_SA_SERVICE_REC_SERVICE_NAME; + } + + id_priv->query_id = ib_sa_service_rec_get(&sa_client, + id_priv->id.device, + id_priv->id.port_num, + &sr, mask, + 2000, GFP_KERNEL, + cma_query_ib_service_handler, + work, &id_priv->query); + + if (id_priv->query_id < 0) { + cma_id_put(id_priv); + kfree(work); + return id_priv->query_id; + } + + return 0; +} + +int rdma_resolve_ib_service(struct rdma_cm_id *id, + struct rdma_ucm_ib_service *ibs) +{ + struct rdma_id_private *id_priv; + int ret; + + id_priv = container_of(id, struct rdma_id_private, id); + if (!id_priv->cma_dev || + !cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDRINFO_QUERY)) + return -EINVAL; + + if (rdma_cap_ib_sa(id->device, id->port_num)) + ret = cma_resolve_ib_service(id_priv, ibs); + else + ret = -EOPNOTSUPP; + + if (ret) + goto err; + + return 0; +err: + cma_comp_exch(id_priv, RDMA_CM_ADDRINFO_QUERY, RDMA_CM_ADDR_BOUND); + return ret; +} +EXPORT_SYMBOL(rdma_resolve_ib_service); diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h index b7354c94cf1b..c604b601f4d9 100644 --- a/drivers/infiniband/core/cma_priv.h +++ b/drivers/infiniband/core/cma_priv.h @@ -47,7 +47,9 @@ enum rdma_cm_state { RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN, RDMA_CM_DEVICE_REMOVAL, - RDMA_CM_DESTROYING + RDMA_CM_DESTROYING, + RDMA_CM_ADDRINFO_QUERY, + RDMA_CM_ADDRINFO_RESOLVED }; struct rdma_id_private { diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 6e700b974033..1915f4e68308 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -282,6 +282,10 @@ static struct ucma_event *ucma_create_uevent(struct ucma_context *ctx, } uevent->resp.event = event->event; uevent->resp.status = event->status; + + if (event->event == RDMA_CM_EVENT_ADDRINFO_RESOLVED) + goto out; + if (ctx->cm_id->qp_type == IB_QPT_UD) ucma_copy_ud_event(ctx->cm_id->device, &uevent->resp.param.ud, &event->param.ud); @@ -289,6 +293,7 @@ static struct ucma_event *ucma_create_uevent(struct ucma_context *ctx, ucma_copy_conn_event(&uevent->resp.param.conn, &event->param.conn); +out: uevent->resp.ece.vendor_id = event->ece.vendor_id; uevent->resp.ece.attr_mod = event->ece.attr_mod; return uevent; @@ -728,6 +733,28 @@ static ssize_t ucma_resolve_addr(struct ucma_file *file, return ret; } +static ssize_t ucma_resolve_ib_service(struct ucma_file *file, + const char __user *inbuf, int in_len, + int out_len) +{ + struct rdma_ucm_resolve_ib_service cmd; + struct ucma_context *ctx; + int ret; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + mutex_lock(&ctx->mutex); + ret = rdma_resolve_ib_service(ctx->cm_id, &cmd.ibs); + mutex_unlock(&ctx->mutex); + ucma_put_ctx(ctx); + return ret; +} + static ssize_t ucma_resolve_route(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) @@ -1703,7 +1730,8 @@ static ssize_t (*ucma_cmd_table[])(struct ucma_file *file, [RDMA_USER_CM_CMD_QUERY] = ucma_query, [RDMA_USER_CM_CMD_BIND] = ucma_bind, [RDMA_USER_CM_CMD_RESOLVE_ADDR] = ucma_resolve_addr, - [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast + [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast, + [RDMA_USER_CM_CMD_RESOLVE_IB_SERVICE] = ucma_resolve_ib_service }; static ssize_t ucma_write(struct file *filp, const char __user *buf, diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index d1593ad47e28..72d1568e4cfb 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -33,7 +33,9 @@ enum rdma_cm_event_type { RDMA_CM_EVENT_MULTICAST_JOIN, RDMA_CM_EVENT_MULTICAST_ERROR, RDMA_CM_EVENT_ADDR_CHANGE, - RDMA_CM_EVENT_TIMEWAIT_EXIT + RDMA_CM_EVENT_TIMEWAIT_EXIT, + RDMA_CM_EVENT_ADDRINFO_RESOLVED, + RDMA_CM_EVENT_ADDRINFO_ERROR }; const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event); @@ -63,6 +65,9 @@ struct rdma_route { * 2 - Both primary and alternate path are available */ int num_pri_alt_paths; + + unsigned int num_service_recs; + struct sa_service_rec *service_recs; }; struct rdma_conn_param { @@ -197,6 +202,17 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, */ int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms); +/** + * rdma_resolve_ib_service - Resolve the IB service record of the + * service with the given service ID or name. + * + * This function is optional in the rdma cm flow. It is called on the client + * side of a connection, before calling rdma_resolve_route. The resolution + * can be done once per rdma_cm_id. + */ +int rdma_resolve_ib_service(struct rdma_cm_id *id, + struct rdma_ucm_ib_service *ibs); + /** * rdma_create_qp - Allocate a QP and associate it with the specified RDMA * identifier. diff --git a/include/uapi/rdma/rdma_user_cm.h b/include/uapi/rdma/rdma_user_cm.h index 7cea03581f79..8799623bcba0 100644 --- a/include/uapi/rdma/rdma_user_cm.h +++ b/include/uapi/rdma/rdma_user_cm.h @@ -67,7 +67,8 @@ enum { RDMA_USER_CM_CMD_QUERY, RDMA_USER_CM_CMD_BIND, RDMA_USER_CM_CMD_RESOLVE_ADDR, - RDMA_USER_CM_CMD_JOIN_MCAST + RDMA_USER_CM_CMD_JOIN_MCAST, + RDMA_USER_CM_CMD_RESOLVE_IB_SERVICE }; /* See IBTA Annex A11, servies ID bytes 4 & 5 */ @@ -338,4 +339,21 @@ struct rdma_ucm_migrate_resp { __u32 events_reported; }; +enum { + RDMA_USER_CM_IB_SERVICE_FLAG_ID = 1 << 0, + RDMA_USER_CM_IB_SERVICE_FLAG_NAME = 1 << 1, +}; + +#define RDMA_USER_CM_IB_SERVICE_NAME_SIZE 64 +struct rdma_ucm_ib_service { + __u64 service_id; + __u8 service_name[RDMA_USER_CM_IB_SERVICE_NAME_SIZE]; + __u32 flags; + __u32 reserved; +}; + +struct rdma_ucm_resolve_ib_service { + __u32 id; + struct rdma_ucm_ib_service ibs; +}; #endif /* RDMA_USER_CM_H */