]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
RDMA/hns: Fix missing flush CQE for DWQE
authorChengchang Tang <tangchengchang@huawei.com>
Fri, 20 Dec 2024 05:52:49 +0000 (13:52 +0800)
committerLeon Romanovsky <leon@kernel.org>
Mon, 23 Dec 2024 14:58:30 +0000 (09:58 -0500)
Flush CQE handler has not been called if QP state gets into errored
mode in DWQE path. So, the new added outstanding WQEs will never be
flushed.

It leads to a hung task timeout when using NFS over RDMA:
    __switch_to+0x7c/0xd0
    __schedule+0x350/0x750
    schedule+0x50/0xf0
    schedule_timeout+0x2c8/0x340
    wait_for_common+0xf4/0x2b0
    wait_for_completion+0x20/0x40
    __ib_drain_sq+0x140/0x1d0 [ib_core]
    ib_drain_sq+0x98/0xb0 [ib_core]
    rpcrdma_xprt_disconnect+0x68/0x270 [rpcrdma]
    xprt_rdma_close+0x20/0x60 [rpcrdma]
    xprt_autoclose+0x64/0x1cc [sunrpc]
    process_one_work+0x1d8/0x4e0
    worker_thread+0x154/0x420
    kthread+0x108/0x150
    ret_from_fork+0x10/0x18

Fixes: 01584a5edcc4 ("RDMA/hns: Add support of direct wqe")
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com>
Link: https://patch.msgid.link/20241220055249.146943-5-huangjunxian6@hisilicon.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
drivers/infiniband/hw/hns/hns_roce_hw_v2.c

index d0469d27c63cbdc0df6d1e3c57ae63665f7dcfbb..0144e7210d05a1d3466caa0af2df04f0e6002dc6 100644 (file)
@@ -670,6 +670,10 @@ static void write_dwqe(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp,
 #define HNS_ROCE_SL_SHIFT 2
        struct hns_roce_v2_rc_send_wqe *rc_sq_wqe = wqe;
 
+       if (unlikely(qp->state == IB_QPS_ERR)) {
+               flush_cqe(hr_dev, qp);
+               return;
+       }
        /* All kinds of DirectWQE have the same header field layout */
        hr_reg_enable(rc_sq_wqe, RC_SEND_WQE_FLAG);
        hr_reg_write(rc_sq_wqe, RC_SEND_WQE_DB_SL_L, qp->sl);