]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
RDMA/rxe: Fix rnr retry behavior
authorBob Pearson <rpearsonhpe@gmail.com>
Thu, 30 Jun 2022 19:04:22 +0000 (14:04 -0500)
committerJason Gunthorpe <jgg@nvidia.com>
Fri, 22 Jul 2022 20:43:00 +0000 (17:43 -0300)
Currently the completer tasklet when retransmit timer or the rnr timer
fires the same flag (qp->req.need_retry) is set so that if either timer
fires it will attempt to perform a retry flow on the send queue.  This has
the effect of responding to an RNR NAK at the first retransmit timer event
which might not allow the requested rnr timeout.

This patch adds a new flag (qp->req.wait_for_rnr_timer) which, if set,
prevents a retry flow until the rnr nak timer fires.

This patch fixes rnr retry errors which can be observed by running the
pyverbs test_rdmacm_async_traffic_external_qp multiple times. With this
patch applied they do not occur.

Link: https://lore.kernel.org/linux-rdma/a8287823-1408-4273-bc22-99a0678db640@gmail.com/
Link: https://lore.kernel.org/linux-rdma/2bafda9e-2bb6-186d-12a1-179e8f6a2678@talpey.com/
Fixes: 8700e3e7c485 ("Soft RoCE driver")
Link: https://lore.kernel.org/r/20220630190425.2251-6-rpearsonhpe@gmail.com
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
drivers/infiniband/sw/rxe/rxe_comp.c
drivers/infiniband/sw/rxe/rxe_qp.c
drivers/infiniband/sw/rxe/rxe_req.c
drivers/infiniband/sw/rxe/rxe_verbs.h

index da3a398053b8e60316265626af4a550469bba757..4fc31bb7eee6d8239955381acfcb5782431b1b8d 100644 (file)
@@ -114,6 +114,8 @@ void retransmit_timer(struct timer_list *t)
 {
        struct rxe_qp *qp = from_timer(qp, t, retrans_timer);
 
+       pr_debug("%s: fired for qp#%d\n", __func__, qp->elem.index);
+
        if (qp->valid) {
                qp->comp.timeout = 1;
                rxe_run_task(&qp->comp.task, 1);
@@ -730,11 +732,15 @@ int rxe_completer(void *arg)
                        break;
 
                case COMPST_RNR_RETRY:
+                       /* we come here if we received an RNR NAK */
                        if (qp->comp.rnr_retry > 0) {
                                if (qp->comp.rnr_retry != 7)
                                        qp->comp.rnr_retry--;
 
-                               qp->req.need_retry = 1;
+                               /* don't start a retry flow until the
+                                * rnr timer has fired
+                                */
+                               qp->req.wait_for_rnr_timer = 1;
                                pr_debug("qp#%d set rnr nak timer\n",
                                         qp_num(qp));
                                mod_timer(&qp->rnr_nak_timer,
index 65d75eea460f761f4eafca7d9e54ce22b9d0f770..eef91b8cb4ed94fb9a7e52a90d15280044b13c8d 100644 (file)
@@ -505,6 +505,7 @@ static void rxe_qp_reset(struct rxe_qp *qp)
        atomic_set(&qp->ssn, 0);
        qp->req.opcode = -1;
        qp->req.need_retry = 0;
+       qp->req.wait_for_rnr_timer = 0;
        qp->req.noack_pkts = 0;
        qp->resp.msn = 0;
        qp->resp.opcode = -1;
index 12a4a47ed96917e3bf94a91b584b73a5b22e3a25..f33699a094e0143b694a7119dffa5d9aeefb4a28 100644 (file)
@@ -100,7 +100,11 @@ void rnr_nak_timer(struct timer_list *t)
 {
        struct rxe_qp *qp = from_timer(qp, t, rnr_nak_timer);
 
-       pr_debug("qp#%d rnr nak timer fired\n", qp_num(qp));
+       pr_debug("%s: fired for qp#%d\n", __func__, qp_num(qp));
+
+       /* request a send queue retry */
+       qp->req.need_retry = 1;
+       qp->req.wait_for_rnr_timer = 0;
        rxe_run_task(&qp->req.task, 1);
 }
 
@@ -641,10 +645,17 @@ next_wqe:
                qp->req.need_rd_atomic = 0;
                qp->req.wait_psn = 0;
                qp->req.need_retry = 0;
+               qp->req.wait_for_rnr_timer = 0;
                goto exit;
        }
 
-       if (unlikely(qp->req.need_retry)) {
+       /* we come here if the retransmot timer has fired
+        * or if the rnr timer has fired. If the retransmit
+        * timer fires while we are processing an RNR NAK wait
+        * until the rnr timer has fired before starting the
+        * retry flow
+        */
+       if (unlikely(qp->req.need_retry && !qp->req.wait_for_rnr_timer)) {
                req_retry(qp);
                qp->req.need_retry = 0;
        }
index 628e40c1714b74a508a8e8190d361091e8e3ffde..9fd5861f28fbd49a5f4bbebda7e5c9c474b1b436 100644 (file)
@@ -123,6 +123,7 @@ struct rxe_req_info {
        int                     need_rd_atomic;
        int                     wait_psn;
        int                     need_retry;
+       int                     wait_for_rnr_timer;
        int                     noack_pkts;
        struct rxe_task         task;
 };