]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
RDMA/hns: Fix flush cqe error when racing with destroy qp
authorwenglianfa <wenglianfa@huawei.com>
Thu, 24 Oct 2024 12:39:57 +0000 (20:39 +0800)
committerLeon Romanovsky <leon@kernel.org>
Wed, 30 Oct 2024 12:13:54 +0000 (14:13 +0200)
QP needs to be modified to IB_QPS_ERROR to trigger HW flush cqe. But
when this process races with destroy qp, the destroy-qp process may
modify the QP to IB_QPS_RESET first. In this case flush cqe will fail
since it is invalid to modify qp from IB_QPS_RESET to IB_QPS_ERROR.

Add lock and bit flag to make sure pending flush cqe work is completed
first and no more new works will be added.

Fixes: ffd541d45726 ("RDMA/hns: Add the workqueue framework for flush cqe handler")
Signed-off-by: wenglianfa <wenglianfa@huawei.com>
Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com>
Link: https://patch.msgid.link/20241024124000.2931869-3-huangjunxian6@hisilicon.com
Reviewed-by: Zhu Yanjun <yanjun.zhu@linux.dev>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
drivers/infiniband/hw/hns/hns_roce_device.h
drivers/infiniband/hw/hns/hns_roce_hw_v2.c
drivers/infiniband/hw/hns/hns_roce_qp.c

index 73c78005901e6650fca0254fec3dedf56662d9cd..9b51d5a1533f5a91814c3978c66e021940bb0db9 100644 (file)
@@ -593,6 +593,7 @@ struct hns_roce_dev;
 
 enum {
        HNS_ROCE_FLUSH_FLAG = 0,
+       HNS_ROCE_STOP_FLUSH_FLAG = 1,
 };
 
 struct hns_roce_work {
@@ -656,6 +657,7 @@ struct hns_roce_qp {
        enum hns_roce_cong_type cong_type;
        u8                      tc_mode;
        u8                      priority;
+       spinlock_t flush_lock;
 };
 
 struct hns_roce_ib_iboe {
index bbdeb02102e8780769b5ac31b65df3d8ebb02987..4c3bc1f6a183caf6d4f2bb430958e3bdcc047865 100644 (file)
@@ -5598,8 +5598,15 @@ int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
 {
        struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
        struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+       unsigned long flags;
        int ret;
 
+       /* Make sure flush_cqe() is completed */
+       spin_lock_irqsave(&hr_qp->flush_lock, flags);
+       set_bit(HNS_ROCE_STOP_FLUSH_FLAG, &hr_qp->flush_flag);
+       spin_unlock_irqrestore(&hr_qp->flush_lock, flags);
+       flush_work(&hr_qp->flush_work.work);
+
        ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, udata);
        if (ret)
                ibdev_err(&hr_dev->ib_dev,
index dcaa370d4a2650cf5de082bb0c91516a8e3a7079..2ad03ecdbf8ec7de7b50031dd950e24d98a50563 100644 (file)
@@ -90,11 +90,18 @@ static void flush_work_handle(struct work_struct *work)
 void init_flush_work(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
 {
        struct hns_roce_work *flush_work = &hr_qp->flush_work;
+       unsigned long flags;
+
+       spin_lock_irqsave(&hr_qp->flush_lock, flags);
+       /* Exit directly after destroy_qp() */
+       if (test_bit(HNS_ROCE_STOP_FLUSH_FLAG, &hr_qp->flush_flag)) {
+               spin_unlock_irqrestore(&hr_qp->flush_lock, flags);
+               return;
+       }
 
-       flush_work->hr_dev = hr_dev;
-       INIT_WORK(&flush_work->work, flush_work_handle);
        refcount_inc(&hr_qp->refcount);
        queue_work(hr_dev->irq_workq, &flush_work->work);
+       spin_unlock_irqrestore(&hr_qp->flush_lock, flags);
 }
 
 void flush_cqe(struct hns_roce_dev *dev, struct hns_roce_qp *qp)
@@ -1140,6 +1147,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
                                     struct ib_udata *udata,
                                     struct hns_roce_qp *hr_qp)
 {
+       struct hns_roce_work *flush_work = &hr_qp->flush_work;
        struct hns_roce_ib_create_qp_resp resp = {};
        struct ib_device *ibdev = &hr_dev->ib_dev;
        struct hns_roce_ib_create_qp ucmd = {};
@@ -1148,9 +1156,12 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
        mutex_init(&hr_qp->mutex);
        spin_lock_init(&hr_qp->sq.lock);
        spin_lock_init(&hr_qp->rq.lock);
+       spin_lock_init(&hr_qp->flush_lock);
 
        hr_qp->state = IB_QPS_RESET;
        hr_qp->flush_flag = 0;
+       flush_work->hr_dev = hr_dev;
+       INIT_WORK(&flush_work->work, flush_work_handle);
 
        if (init_attr->create_flags)
                return -EOPNOTSUPP;