]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
RDMA/rxe: Fix rnr retry behavior
authorBob Pearson <rpearsonhpe@gmail.com>
Thu, 30 Jun 2022 19:04:22 +0000 (14:04 -0500)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 17 Aug 2022 12:41:52 +0000 (14:41 +0200)
[ Upstream commit 445fd4f4fb76d513de6b05b08b3a4d0bb980fc80 ]

Currently the completer tasklet when retransmit timer or the rnr timer
fires the same flag (qp->req.need_retry) is set so that if either timer
fires it will attempt to perform a retry flow on the send queue.  This has
the effect of responding to an RNR NAK at the first retransmit timer event
which might not allow the requested rnr timeout.

This patch adds a new flag (qp->req.wait_for_rnr_timer) which, if set,
prevents a retry flow until the rnr nak timer fires.

This patch fixes rnr retry errors which can be observed by running the
pyverbs test_rdmacm_async_traffic_external_qp multiple times. With this
patch applied they do not occur.

Link: https://lore.kernel.org/linux-rdma/a8287823-1408-4273-bc22-99a0678db640@gmail.com/
Link: https://lore.kernel.org/linux-rdma/2bafda9e-2bb6-186d-12a1-179e8f6a2678@talpey.com/
Fixes: 8700e3e7c485 ("Soft RoCE driver")
Link: https://lore.kernel.org/r/20220630190425.2251-6-rpearsonhpe@gmail.com
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
drivers/infiniband/sw/rxe/rxe_comp.c
drivers/infiniband/sw/rxe/rxe_qp.c
drivers/infiniband/sw/rxe/rxe_req.c
drivers/infiniband/sw/rxe/rxe_verbs.h

index 138b3e7d3a5f0aadaefd2dbb0970b11a756b1456..ec671e171f13970d85a5bf93295b123d1fe4e8f3 100644 (file)
@@ -114,6 +114,8 @@ void retransmit_timer(struct timer_list *t)
 {
        struct rxe_qp *qp = from_timer(qp, t, retrans_timer);
 
+       pr_debug("%s: fired for qp#%d\n", __func__, qp->elem.index);
+
        if (qp->valid) {
                qp->comp.timeout = 1;
                rxe_run_task(&qp->comp.task, 1);
@@ -729,11 +731,15 @@ int rxe_completer(void *arg)
                        break;
 
                case COMPST_RNR_RETRY:
+                       /* we come here if we received an RNR NAK */
                        if (qp->comp.rnr_retry > 0) {
                                if (qp->comp.rnr_retry != 7)
                                        qp->comp.rnr_retry--;
 
-                               qp->req.need_retry = 1;
+                               /* don't start a retry flow until the
+                                * rnr timer has fired
+                                */
+                               qp->req.wait_for_rnr_timer = 1;
                                pr_debug("qp#%d set rnr nak timer\n",
                                         qp_num(qp));
                                mod_timer(&qp->rnr_nak_timer,
index 62acf890af6c1c92ced7cf2ac344a3667bf758dc..7d0c4432d3fdf48749db703ecd97bda644eb22f5 100644 (file)
@@ -513,6 +513,7 @@ static void rxe_qp_reset(struct rxe_qp *qp)
        atomic_set(&qp->ssn, 0);
        qp->req.opcode = -1;
        qp->req.need_retry = 0;
+       qp->req.wait_for_rnr_timer = 0;
        qp->req.noack_pkts = 0;
        qp->resp.msn = 0;
        qp->resp.opcode = -1;
index d574c47099b85cda047bfdac4449ec5c34ada4fb..90669b3c56afbb43b18fc22d847062d51f5658b5 100644 (file)
@@ -103,7 +103,11 @@ void rnr_nak_timer(struct timer_list *t)
 {
        struct rxe_qp *qp = from_timer(qp, t, rnr_nak_timer);
 
-       pr_debug("qp#%d rnr nak timer fired\n", qp_num(qp));
+       pr_debug("%s: fired for qp#%d\n", __func__, qp_num(qp));
+
+       /* request a send queue retry */
+       qp->req.need_retry = 1;
+       qp->req.wait_for_rnr_timer = 0;
        rxe_run_task(&qp->req.task, 1);
 }
 
@@ -626,10 +630,17 @@ next_wqe:
                qp->req.need_rd_atomic = 0;
                qp->req.wait_psn = 0;
                qp->req.need_retry = 0;
+               qp->req.wait_for_rnr_timer = 0;
                goto exit;
        }
 
-       if (unlikely(qp->req.need_retry)) {
+       /* we come here if the retransmot timer has fired
+        * or if the rnr timer has fired. If the retransmit
+        * timer fires while we are processing an RNR NAK wait
+        * until the rnr timer has fired before starting the
+        * retry flow
+        */
+       if (unlikely(qp->req.need_retry && !qp->req.wait_for_rnr_timer)) {
                req_retry(qp);
                qp->req.need_retry = 0;
        }
index e7eff1ca75e9b0ec1c2bbc7f5694553df8475a7d..33e8d0547553f6c49194926c4f1261726ba66027 100644 (file)
@@ -123,6 +123,7 @@ struct rxe_req_info {
        int                     need_rd_atomic;
        int                     wait_psn;
        int                     need_retry;
+       int                     wait_for_rnr_timer;
        int                     noack_pkts;
        struct rxe_task         task;
 };