From 534aebfe082a00a4ce6d30fe6316822ccda594d9 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 4 Mar 2024 10:17:55 +0100 Subject: [PATCH] 6.1-stable patches added patches: rdma-core-refactor-rdma_bind_addr.patch rdma-core-update-cma-destination-address-on-rdma_resolve_addr.patch --- .../rdma-core-refactor-rdma_bind_addr.patch | 326 ++++++++++++++++++ ...ination-address-on-rdma_resolve_addr.patch | 53 +++ queue-6.1/series | 2 + 3 files changed, 381 insertions(+) create mode 100644 queue-6.1/rdma-core-refactor-rdma_bind_addr.patch create mode 100644 queue-6.1/rdma-core-update-cma-destination-address-on-rdma_resolve_addr.patch diff --git a/queue-6.1/rdma-core-refactor-rdma_bind_addr.patch b/queue-6.1/rdma-core-refactor-rdma_bind_addr.patch new file mode 100644 index 00000000000..b455bc204c1 --- /dev/null +++ b/queue-6.1/rdma-core-refactor-rdma_bind_addr.patch @@ -0,0 +1,326 @@ +From 8d037973d48c026224ab285e6a06985ccac6f7bf Mon Sep 17 00:00:00 2001 +From: Patrisious Haddad +Date: Wed, 4 Jan 2023 10:01:38 +0200 +Subject: RDMA/core: Refactor rdma_bind_addr + +From: Patrisious Haddad + +commit 8d037973d48c026224ab285e6a06985ccac6f7bf upstream. + +Refactor rdma_bind_addr function so that it doesn't require that the +cma destination address be changed before calling it. + +So now it will update the destination address internally only when it is +really needed and after passing all the required checks. + +Which in turn results in a cleaner and more sensible call and error +handling flows for the functions that call it directly or indirectly. + +Signed-off-by: Patrisious Haddad +Reported-by: Wei Chen +Reviewed-by: Mark Zhang +Link: https://lore.kernel.org/r/3d0e9a2fd62bc10ba02fed1c7c48a48638952320.1672819273.git.leonro@nvidia.com +Signed-off-by: Leon Romanovsky +Signed-off-by: Greg Kroah-Hartman +--- + drivers/infiniband/core/cma.c | 253 +++++++++++++++++++++--------------------- + 1 file changed, 130 insertions(+), 123 deletions(-) + +--- a/drivers/infiniband/core/cma.c ++++ b/drivers/infiniband/core/cma.c +@@ -3547,121 +3547,6 @@ err: + return ret; + } + +-static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, +- const struct sockaddr *dst_addr) +-{ +- struct sockaddr_storage zero_sock = {}; +- +- if (src_addr && src_addr->sa_family) +- return rdma_bind_addr(id, src_addr); +- +- /* +- * When the src_addr is not specified, automatically supply an any addr +- */ +- zero_sock.ss_family = dst_addr->sa_family; +- if (IS_ENABLED(CONFIG_IPV6) && dst_addr->sa_family == AF_INET6) { +- struct sockaddr_in6 *src_addr6 = +- (struct sockaddr_in6 *)&zero_sock; +- struct sockaddr_in6 *dst_addr6 = +- (struct sockaddr_in6 *)dst_addr; +- +- src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; +- if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL) +- id->route.addr.dev_addr.bound_dev_if = +- dst_addr6->sin6_scope_id; +- } else if (dst_addr->sa_family == AF_IB) { +- ((struct sockaddr_ib *)&zero_sock)->sib_pkey = +- ((struct sockaddr_ib *)dst_addr)->sib_pkey; +- } +- return rdma_bind_addr(id, (struct sockaddr *)&zero_sock); +-} +- +-/* +- * If required, resolve the source address for bind and leave the id_priv in +- * state RDMA_CM_ADDR_BOUND. This oddly uses the state to determine the prior +- * calls made by ULP, a previously bound ID will not be re-bound and src_addr is +- * ignored. +- */ +-static int resolve_prepare_src(struct rdma_id_private *id_priv, +- struct sockaddr *src_addr, +- const struct sockaddr *dst_addr) +-{ +- int ret; +- +- memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); +- if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) { +- /* For a well behaved ULP state will be RDMA_CM_IDLE */ +- ret = cma_bind_addr(&id_priv->id, src_addr, dst_addr); +- if (ret) +- goto err_dst; +- if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, +- RDMA_CM_ADDR_QUERY))) { +- ret = -EINVAL; +- goto err_dst; +- } +- } +- +- if (cma_family(id_priv) != dst_addr->sa_family) { +- ret = -EINVAL; +- goto err_state; +- } +- return 0; +- +-err_state: +- cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); +-err_dst: +- memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); +- return ret; +-} +- +-int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, +- const struct sockaddr *dst_addr, unsigned long timeout_ms) +-{ +- struct rdma_id_private *id_priv = +- container_of(id, struct rdma_id_private, id); +- int ret; +- +- ret = resolve_prepare_src(id_priv, src_addr, dst_addr); +- if (ret) +- return ret; +- +- if (cma_any_addr(dst_addr)) { +- ret = cma_resolve_loopback(id_priv); +- } else { +- if (dst_addr->sa_family == AF_IB) { +- ret = cma_resolve_ib_addr(id_priv); +- } else { +- /* +- * The FSM can return back to RDMA_CM_ADDR_BOUND after +- * rdma_resolve_ip() is called, eg through the error +- * path in addr_handler(). If this happens the existing +- * request must be canceled before issuing a new one. +- * Since canceling a request is a bit slow and this +- * oddball path is rare, keep track once a request has +- * been issued. The track turns out to be a permanent +- * state since this is the only cancel as it is +- * immediately before rdma_resolve_ip(). +- */ +- if (id_priv->used_resolve_ip) +- rdma_addr_cancel(&id->route.addr.dev_addr); +- else +- id_priv->used_resolve_ip = 1; +- ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr, +- &id->route.addr.dev_addr, +- timeout_ms, addr_handler, +- false, id_priv); +- } +- } +- if (ret) +- goto err; +- +- return 0; +-err: +- cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); +- return ret; +-} +-EXPORT_SYMBOL(rdma_resolve_addr); +- + int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse) + { + struct rdma_id_private *id_priv; +@@ -4064,27 +3949,26 @@ err: + } + EXPORT_SYMBOL(rdma_listen); + +-int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) ++static int rdma_bind_addr_dst(struct rdma_id_private *id_priv, ++ struct sockaddr *addr, const struct sockaddr *daddr) + { +- struct rdma_id_private *id_priv; ++ struct sockaddr *id_daddr; + int ret; +- struct sockaddr *daddr; + + if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 && + addr->sa_family != AF_IB) + return -EAFNOSUPPORT; + +- id_priv = container_of(id, struct rdma_id_private, id); + if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND)) + return -EINVAL; + +- ret = cma_check_linklocal(&id->route.addr.dev_addr, addr); ++ ret = cma_check_linklocal(&id_priv->id.route.addr.dev_addr, addr); + if (ret) + goto err1; + + memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr)); + if (!cma_any_addr(addr)) { +- ret = cma_translate_addr(addr, &id->route.addr.dev_addr); ++ ret = cma_translate_addr(addr, &id_priv->id.route.addr.dev_addr); + if (ret) + goto err1; + +@@ -4104,8 +3988,10 @@ int rdma_bind_addr(struct rdma_cm_id *id + } + #endif + } +- daddr = cma_dst_addr(id_priv); +- daddr->sa_family = addr->sa_family; ++ id_daddr = cma_dst_addr(id_priv); ++ if (daddr != id_daddr) ++ memcpy(id_daddr, daddr, rdma_addr_size(addr)); ++ id_daddr->sa_family = addr->sa_family; + + ret = cma_get_port(id_priv); + if (ret) +@@ -4121,6 +4007,127 @@ err1: + cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE); + return ret; + } ++ ++static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, ++ const struct sockaddr *dst_addr) ++{ ++ struct rdma_id_private *id_priv = ++ container_of(id, struct rdma_id_private, id); ++ struct sockaddr_storage zero_sock = {}; ++ ++ if (src_addr && src_addr->sa_family) ++ return rdma_bind_addr_dst(id_priv, src_addr, dst_addr); ++ ++ /* ++ * When the src_addr is not specified, automatically supply an any addr ++ */ ++ zero_sock.ss_family = dst_addr->sa_family; ++ if (IS_ENABLED(CONFIG_IPV6) && dst_addr->sa_family == AF_INET6) { ++ struct sockaddr_in6 *src_addr6 = ++ (struct sockaddr_in6 *)&zero_sock; ++ struct sockaddr_in6 *dst_addr6 = ++ (struct sockaddr_in6 *)dst_addr; ++ ++ src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; ++ if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL) ++ id->route.addr.dev_addr.bound_dev_if = ++ dst_addr6->sin6_scope_id; ++ } else if (dst_addr->sa_family == AF_IB) { ++ ((struct sockaddr_ib *)&zero_sock)->sib_pkey = ++ ((struct sockaddr_ib *)dst_addr)->sib_pkey; ++ } ++ return rdma_bind_addr_dst(id_priv, (struct sockaddr *)&zero_sock, dst_addr); ++} ++ ++/* ++ * If required, resolve the source address for bind and leave the id_priv in ++ * state RDMA_CM_ADDR_BOUND. This oddly uses the state to determine the prior ++ * calls made by ULP, a previously bound ID will not be re-bound and src_addr is ++ * ignored. ++ */ ++static int resolve_prepare_src(struct rdma_id_private *id_priv, ++ struct sockaddr *src_addr, ++ const struct sockaddr *dst_addr) ++{ ++ int ret; ++ ++ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) { ++ /* For a well behaved ULP state will be RDMA_CM_IDLE */ ++ ret = cma_bind_addr(&id_priv->id, src_addr, dst_addr); ++ if (ret) ++ return ret; ++ if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, ++ RDMA_CM_ADDR_QUERY))) ++ return -EINVAL; ++ ++ } ++ ++ if (cma_family(id_priv) != dst_addr->sa_family) { ++ ret = -EINVAL; ++ goto err_state; ++ } ++ return 0; ++ ++err_state: ++ cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); ++ return ret; ++} ++ ++int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, ++ const struct sockaddr *dst_addr, unsigned long timeout_ms) ++{ ++ struct rdma_id_private *id_priv = ++ container_of(id, struct rdma_id_private, id); ++ int ret; ++ ++ ret = resolve_prepare_src(id_priv, src_addr, dst_addr); ++ if (ret) ++ return ret; ++ ++ if (cma_any_addr(dst_addr)) { ++ ret = cma_resolve_loopback(id_priv); ++ } else { ++ if (dst_addr->sa_family == AF_IB) { ++ ret = cma_resolve_ib_addr(id_priv); ++ } else { ++ /* ++ * The FSM can return back to RDMA_CM_ADDR_BOUND after ++ * rdma_resolve_ip() is called, eg through the error ++ * path in addr_handler(). If this happens the existing ++ * request must be canceled before issuing a new one. ++ * Since canceling a request is a bit slow and this ++ * oddball path is rare, keep track once a request has ++ * been issued. The track turns out to be a permanent ++ * state since this is the only cancel as it is ++ * immediately before rdma_resolve_ip(). ++ */ ++ if (id_priv->used_resolve_ip) ++ rdma_addr_cancel(&id->route.addr.dev_addr); ++ else ++ id_priv->used_resolve_ip = 1; ++ ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr, ++ &id->route.addr.dev_addr, ++ timeout_ms, addr_handler, ++ false, id_priv); ++ } ++ } ++ if (ret) ++ goto err; ++ ++ return 0; ++err: ++ cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); ++ return ret; ++} ++EXPORT_SYMBOL(rdma_resolve_addr); ++ ++int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) ++{ ++ struct rdma_id_private *id_priv = ++ container_of(id, struct rdma_id_private, id); ++ ++ return rdma_bind_addr_dst(id_priv, addr, cma_dst_addr(id_priv)); ++} + EXPORT_SYMBOL(rdma_bind_addr); + + static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv) diff --git a/queue-6.1/rdma-core-update-cma-destination-address-on-rdma_resolve_addr.patch b/queue-6.1/rdma-core-update-cma-destination-address-on-rdma_resolve_addr.patch new file mode 100644 index 00000000000..283325df8f6 --- /dev/null +++ b/queue-6.1/rdma-core-update-cma-destination-address-on-rdma_resolve_addr.patch @@ -0,0 +1,53 @@ +From 0e15863015d97c1ee2cc29d599abcc7fa2dc3e95 Mon Sep 17 00:00:00 2001 +From: Shiraz Saleem +Date: Wed, 12 Jul 2023 18:41:33 -0500 +Subject: RDMA/core: Update CMA destination address on rdma_resolve_addr + +From: Shiraz Saleem + +commit 0e15863015d97c1ee2cc29d599abcc7fa2dc3e95 upstream. + +8d037973d48c ("RDMA/core: Refactor rdma_bind_addr") intoduces as regression +on irdma devices on certain tests which uses rdma CM, such as cmtime. + +No connections can be established with the MAD QP experiences a fatal +error on the active side. + +The cma destination address is not updated with the dst_addr when ULP +on active side calls rdma_bind_addr followed by rdma_resolve_addr. +The id_priv state is 'bound' in resolve_prepare_src and update is skipped. + +This leaves the dgid passed into irdma driver to create an Address Handle +(AH) for the MAD QP at 0. The create AH descriptor as well as the ARP cache +entry is invalid and HW throws an asynchronous events as result. + +[ 1207.656888] resolve_prepare_src caller: ucma_resolve_addr+0xff/0x170 [rdma_ucm] daddr=200.0.4.28 id_priv->state=7 +[....] +[ 1207.680362] ice 0000:07:00.1 rocep7s0f1: caller: irdma_create_ah+0x3e/0x70 [irdma] ah_id=0 arp_idx=0 dest_ip=0.0.0.0 +destMAC=00:00:64:ca:b7:52 ipvalid=1 raw=0000:0000:0000:0000:0000:ffff:0000:0000 +[ 1207.682077] ice 0000:07:00.1 rocep7s0f1: abnormal ae_id = 0x401 bool qp=1 qp_id = 1, ae_src=5 +[ 1207.691657] infiniband rocep7s0f1: Fatal error (1) on MAD QP (1) + +Fix this by updating the CMA destination address when the ULP calls +a resolve address with the CM state already bound. + +Fixes: 8d037973d48c ("RDMA/core: Refactor rdma_bind_addr") +Signed-off-by: Shiraz Saleem +Link: https://lore.kernel.org/r/20230712234133.1343-1-shiraz.saleem@intel.com +Signed-off-by: Leon Romanovsky +Signed-off-by: Greg Kroah-Hartman +--- + drivers/infiniband/core/cma.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/infiniband/core/cma.c ++++ b/drivers/infiniband/core/cma.c +@@ -4060,6 +4060,8 @@ static int resolve_prepare_src(struct rd + RDMA_CM_ADDR_QUERY))) + return -EINVAL; + ++ } else { ++ memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); + } + + if (cma_family(id_priv) != dst_addr->sa_family) { diff --git a/queue-6.1/series b/queue-6.1/series index 13aebcd9ab7..50ad2f186ad 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -105,3 +105,5 @@ selftests-mptcp-join-add-ss-mptcp-support-check.patch mptcp-fix-snd_wnd-initialization-for-passive-socket.patch mptcp-fix-double-free-on-socket-dismantle.patch mptcp-fix-possible-deadlock-in-subflow-diag.patch +rdma-core-refactor-rdma_bind_addr.patch +rdma-core-update-cma-destination-address-on-rdma_resolve_addr.patch -- 2.47.3