]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
RDMA/hns: Fix soft lockup under heavy CEQE load
authorJunxian Huang <huangjunxian6@hisilicon.com>
Wed, 10 Jul 2024 13:36:59 +0000 (21:36 +0800)
committerLeon Romanovsky <leon@kernel.org>
Thu, 11 Jul 2024 10:25:11 +0000 (13:25 +0300)
CEQEs are handled in interrupt handler currently. This may cause the
CPU core staying in interrupt context too long and lead to soft lockup
under heavy load.

Handle CEQEs in BH workqueue and set an upper limit for the number of
CEQE handled by a single call of work handler.

Fixes: a5073d6054f7 ("RDMA/hns: Add eq support of hip08")
Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com>
Link: https://lore.kernel.org/r/20240710133705.896445-3-huangjunxian6@hisilicon.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
drivers/infiniband/hw/hns/hns_roce_device.h
drivers/infiniband/hw/hns/hns_roce_hw_v2.c

index bf7f966c89fcad7bb23694886666e8269339cf90..3cc5a4ff404a71aea1bc47eb11a2d20015306b22 100644 (file)
@@ -717,6 +717,7 @@ struct hns_roce_eq {
        int                             shift;
        int                             event_type;
        int                             sub_type;
+       struct work_struct              work;
 };
 
 struct hns_roce_eq_table {
index eb6052ee89383f1b130457354626b327aee81422..2f16554c96befdad377186cc4334be37cb5f2180 100644 (file)
@@ -36,6 +36,7 @@
 #include <linux/iopoll.h>
 #include <linux/kernel.h>
 #include <linux/types.h>
+#include <linux/workqueue.h>
 #include <net/addrconf.h>
 #include <rdma/ib_addr.h>
 #include <rdma/ib_cache.h>
@@ -6140,33 +6141,11 @@ static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq)
                !!(eq->cons_index & eq->entries)) ? ceqe : NULL;
 }
 
-static irqreturn_t hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev,
-                                      struct hns_roce_eq *eq)
+static irqreturn_t hns_roce_v2_ceq_int(struct hns_roce_eq *eq)
 {
-       struct hns_roce_ceqe *ceqe = next_ceqe_sw_v2(eq);
-       irqreturn_t ceqe_found = IRQ_NONE;
-       u32 cqn;
-
-       while (ceqe) {
-               /* Make sure we read CEQ entry after we have checked the
-                * ownership bit
-                */
-               dma_rmb();
-
-               cqn = hr_reg_read(ceqe, CEQE_CQN);
-
-               hns_roce_cq_completion(hr_dev, cqn);
-
-               ++eq->cons_index;
-               ceqe_found = IRQ_HANDLED;
-               atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CEQE_CNT]);
-
-               ceqe = next_ceqe_sw_v2(eq);
-       }
+       queue_work(system_bh_wq, &eq->work);
 
-       update_eq_db(eq);
-
-       return IRQ_RETVAL(ceqe_found);
+       return IRQ_HANDLED;
 }
 
 static irqreturn_t hns_roce_v2_msix_interrupt_eq(int irq, void *eq_ptr)
@@ -6177,7 +6156,7 @@ static irqreturn_t hns_roce_v2_msix_interrupt_eq(int irq, void *eq_ptr)
 
        if (eq->type_flag == HNS_ROCE_CEQ)
                /* Completion event interrupt */
-               int_work = hns_roce_v2_ceq_int(hr_dev, eq);
+               int_work = hns_roce_v2_ceq_int(eq);
        else
                /* Asynchronous event interrupt */
                int_work = hns_roce_v2_aeq_int(hr_dev, eq);
@@ -6545,6 +6524,34 @@ free_cmd_mbox:
        return ret;
 }
 
+static void hns_roce_ceq_work(struct work_struct *work)
+{
+       struct hns_roce_eq *eq = from_work(eq, work, work);
+       struct hns_roce_ceqe *ceqe = next_ceqe_sw_v2(eq);
+       struct hns_roce_dev *hr_dev = eq->hr_dev;
+       int ceqe_num = 0;
+       u32 cqn;
+
+       while (ceqe && ceqe_num < hr_dev->caps.ceqe_depth) {
+               /* Make sure we read CEQ entry after we have checked the
+                * ownership bit
+                */
+               dma_rmb();
+
+               cqn = hr_reg_read(ceqe, CEQE_CQN);
+
+               hns_roce_cq_completion(hr_dev, cqn);
+
+               ++eq->cons_index;
+               ++ceqe_num;
+               atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CEQE_CNT]);
+
+               ceqe = next_ceqe_sw_v2(eq);
+       }
+
+       update_eq_db(eq);
+}
+
 static int __hns_roce_request_irq(struct hns_roce_dev *hr_dev, int irq_num,
                                  int comp_num, int aeq_num, int other_num)
 {
@@ -6576,21 +6583,24 @@ static int __hns_roce_request_irq(struct hns_roce_dev *hr_dev, int irq_num,
                         j - other_num - aeq_num);
 
        for (j = 0; j < irq_num; j++) {
-               if (j < other_num)
+               if (j < other_num) {
                        ret = request_irq(hr_dev->irq[j],
                                          hns_roce_v2_msix_interrupt_abn,
                                          0, hr_dev->irq_names[j], hr_dev);
-
-               else if (j < (other_num + comp_num))
+               } else if (j < (other_num + comp_num)) {
+                       INIT_WORK(&eq_table->eq[j - other_num].work,
+                                 hns_roce_ceq_work);
                        ret = request_irq(eq_table->eq[j - other_num].irq,
                                          hns_roce_v2_msix_interrupt_eq,
                                          0, hr_dev->irq_names[j + aeq_num],
                                          &eq_table->eq[j - other_num]);
-               else
+               } else {
                        ret = request_irq(eq_table->eq[j - other_num].irq,
                                          hns_roce_v2_msix_interrupt_eq,
                                          0, hr_dev->irq_names[j - comp_num],
                                          &eq_table->eq[j - other_num]);
+               }
+
                if (ret) {
                        dev_err(hr_dev->dev, "request irq error!\n");
                        goto err_request_failed;
@@ -6600,12 +6610,16 @@ static int __hns_roce_request_irq(struct hns_roce_dev *hr_dev, int irq_num,
        return 0;
 
 err_request_failed:
-       for (j -= 1; j >= 0; j--)
-               if (j < other_num)
+       for (j -= 1; j >= 0; j--) {
+               if (j < other_num) {
                        free_irq(hr_dev->irq[j], hr_dev);
-               else
-                       free_irq(eq_table->eq[j - other_num].irq,
-                                &eq_table->eq[j - other_num]);
+                       continue;
+               }
+               free_irq(eq_table->eq[j - other_num].irq,
+                        &eq_table->eq[j - other_num]);
+               if (j < other_num + comp_num)
+                       cancel_work_sync(&eq_table->eq[j - other_num].work);
+       }
 
 err_kzalloc_failed:
        for (i -= 1; i >= 0; i--)
@@ -6626,8 +6640,11 @@ static void __hns_roce_free_irq(struct hns_roce_dev *hr_dev)
        for (i = 0; i < hr_dev->caps.num_other_vectors; i++)
                free_irq(hr_dev->irq[i], hr_dev);
 
-       for (i = 0; i < eq_num; i++)
+       for (i = 0; i < eq_num; i++) {
                free_irq(hr_dev->eq_table.eq[i].irq, &hr_dev->eq_table.eq[i]);
+               if (i < hr_dev->caps.num_comp_vectors)
+                       cancel_work_sync(&hr_dev->eq_table.eq[i].work);
+       }
 
        for (i = 0; i < irq_num; i++)
                kfree(hr_dev->irq_names[i]);