]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
RDMA/hns: Fix recv CQ and QP cache affinity
authorChengchang Tang <tangchengchang@huawei.com>
Thu, 16 Oct 2025 11:40:48 +0000 (19:40 +0800)
committerLeon Romanovsky <leon@kernel.org>
Mon, 27 Oct 2025 09:44:00 +0000 (05:44 -0400)
Currently driver enforces affinity between QP cache and send CQ
cache, which helps improve the performance of sending, but doesn't
set affinity with recv CQ cache, resulting in suboptimal performance
of receiving.

Use one CQ bank per context to ensure the affinity among QP, send CQ
and recv CQ. For kernel ULP, CQ bank is fixed to 0.

Fixes: 9e03dbea2b06 ("RDMA/hns: Fix CQ and QP cache affinity")
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com>
Link: https://patch.msgid.link/20251016114051.1963197-2-huangjunxian6@hisilicon.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
drivers/infiniband/hw/hns/hns_roce_cq.c
drivers/infiniband/hw/hns/hns_roce_device.h
drivers/infiniband/hw/hns/hns_roce_main.c

index 3a5c93c9fb3e66bde70f59752cf6781a86010888..6aa82fe9dd3dfbfd0d0d9b845e8e810c3b0d1eb8 100644 (file)
@@ -30,6 +30,7 @@
  * SOFTWARE.
  */
 
+#include <linux/pci.h>
 #include <rdma/ib_umem.h>
 #include <rdma/uverbs_ioctl.h>
 #include "hns_roce_device.h"
 #include "hns_roce_hem.h"
 #include "hns_roce_common.h"
 
+void hns_roce_put_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx)
+{
+       struct hns_roce_dev *hr_dev = to_hr_dev(uctx->ibucontext.device);
+       struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
+
+       if (hr_dev->pci_dev->revision < PCI_REVISION_ID_HIP09)
+               return;
+
+       mutex_lock(&cq_table->bank_mutex);
+       cq_table->ctx_num[uctx->cq_bank_id]--;
+       mutex_unlock(&cq_table->bank_mutex);
+}
+
+void hns_roce_get_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx)
+{
+       struct hns_roce_dev *hr_dev = to_hr_dev(uctx->ibucontext.device);
+       struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
+       u32 least_load = cq_table->ctx_num[0];
+       u8 bankid = 0;
+       u8 i;
+
+       if (hr_dev->pci_dev->revision < PCI_REVISION_ID_HIP09)
+               return;
+
+       mutex_lock(&cq_table->bank_mutex);
+       for (i = 1; i < HNS_ROCE_CQ_BANK_NUM; i++) {
+               if (cq_table->ctx_num[i] < least_load) {
+                       least_load = cq_table->ctx_num[i];
+                       bankid = i;
+               }
+       }
+       cq_table->ctx_num[bankid]++;
+       mutex_unlock(&cq_table->bank_mutex);
+
+       uctx->cq_bank_id = bankid;
+}
+
 static u8 get_least_load_bankid_for_cq(struct hns_roce_bank *bank)
 {
        u32 least_load = bank[0].inuse;
@@ -55,7 +93,21 @@ static u8 get_least_load_bankid_for_cq(struct hns_roce_bank *bank)
        return bankid;
 }
 
-static int alloc_cqn(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
+static u8 select_cq_bankid(struct hns_roce_dev *hr_dev,
+                          struct hns_roce_bank *bank, struct ib_udata *udata)
+{
+       struct hns_roce_ucontext *uctx = udata ?
+               rdma_udata_to_drv_context(udata, struct hns_roce_ucontext,
+                                         ibucontext) : NULL;
+
+       if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+               return uctx ? uctx->cq_bank_id : 0;
+
+       return get_least_load_bankid_for_cq(bank);
+}
+
+static int alloc_cqn(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq,
+                    struct ib_udata *udata)
 {
        struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
        struct hns_roce_bank *bank;
@@ -63,7 +115,7 @@ static int alloc_cqn(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
        int id;
 
        mutex_lock(&cq_table->bank_mutex);
-       bankid = get_least_load_bankid_for_cq(cq_table->bank);
+       bankid = select_cq_bankid(hr_dev, cq_table->bank, udata);
        bank = &cq_table->bank[bankid];
 
        id = ida_alloc_range(&bank->ida, bank->min, bank->max, GFP_KERNEL);
@@ -396,7 +448,7 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
                goto err_cq_buf;
        }
 
-       ret = alloc_cqn(hr_dev, hr_cq);
+       ret = alloc_cqn(hr_dev, hr_cq, udata);
        if (ret) {
                ibdev_err(ibdev, "failed to alloc CQN, ret = %d.\n", ret);
                goto err_cq_db;
index 78ee04a48a74a811bb7a951433ee79c4beb5c8a5..06832c0ac05561002509ed07dc1e58cfaef9a380 100644 (file)
@@ -217,6 +217,7 @@ struct hns_roce_ucontext {
        struct mutex            page_mutex;
        struct hns_user_mmap_entry *db_mmap_entry;
        u32                     config;
+       u8 cq_bank_id;
 };
 
 struct hns_roce_pd {
@@ -495,6 +496,7 @@ struct hns_roce_cq_table {
        struct hns_roce_hem_table       table;
        struct hns_roce_bank bank[HNS_ROCE_CQ_BANK_NUM];
        struct mutex                    bank_mutex;
+       u32 ctx_num[HNS_ROCE_CQ_BANK_NUM];
 };
 
 struct hns_roce_srq_table {
@@ -1305,5 +1307,7 @@ hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address,
                                size_t length,
                                enum hns_roce_mmap_type mmap_type);
 bool check_sl_valid(struct hns_roce_dev *hr_dev, u8 sl);
+void hns_roce_put_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx);
+void hns_roce_get_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx);
 
 #endif /* _HNS_ROCE_DEVICE_H */
index d50f36f8a1107ec06873209f02c91274d163c698..f3607fe107a7f9c81c232ae06716465b17610383 100644 (file)
@@ -425,6 +425,8 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
        if (ret)
                goto error_fail_copy_to_udata;
 
+       hns_roce_get_cq_bankid_for_uctx(context);
+
        return 0;
 
 error_fail_copy_to_udata:
@@ -447,6 +449,8 @@ static void hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext)
        struct hns_roce_ucontext *context = to_hr_ucontext(ibcontext);
        struct hns_roce_dev *hr_dev = to_hr_dev(ibcontext->device);
 
+       hns_roce_put_cq_bankid_for_uctx(context);
+
        if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB ||
            hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB)
                mutex_destroy(&context->page_mutex);