From 2529aead51673814ebf464723626ac608b8635a5 Mon Sep 17 00:00:00 2001 From: Jacob Moroni Date: Tue, 20 Jan 2026 21:25:46 +0000 Subject: [PATCH] RDMA/irdma: Use CQ ID for CEQE context The hardware allows for an opaque CQ context field to be carried over into CEQEs for the CQ. Previously, a pointer to the CQ was used for this context. In the normal CQ destroy flow, the CEQ ring is scrubbed to remove any preexisting CEQEs for the CQ that may not have been processed yet so that the CQ structure is not dereferenced in the CEQ ISR after the CQ has been freed. However, in some cases, it is possible for a CEQE to be in flight in HW even after the CQ destroy command completion is received, so it could be missed during the scrub. To protect against this, we can take advantage of the CQ table that already exists and use the CQ ID for this context rather than a CQ pointer. Signed-off-by: Jacob Moroni Link: https://patch.msgid.link/20260120212546.1893076-2-jmoroni@google.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/ctrl.c | 62 ++++++++------------ drivers/infiniband/hw/irdma/hw.c | 88 +++++++++++++++++++++++++---- drivers/infiniband/hw/irdma/puda.c | 14 +++++ drivers/infiniband/hw/irdma/type.h | 6 +- drivers/infiniband/hw/irdma/utils.c | 3 +- drivers/infiniband/hw/irdma/verbs.c | 5 +- 6 files changed, 127 insertions(+), 51 deletions(-) diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c index 022fcdfab3392..45c7433c96f36 100644 --- a/drivers/infiniband/hw/irdma/ctrl.c +++ b/drivers/infiniband/hw/irdma/ctrl.c @@ -2886,15 +2886,6 @@ static int irdma_sc_resume_qp(struct irdma_sc_cqp *cqp, struct irdma_sc_qp *qp, return 0; } -/** - * irdma_sc_cq_ack - acknowledge completion q - * @cq: cq struct - */ -static inline void irdma_sc_cq_ack(struct irdma_sc_cq *cq) -{ - writel(cq->cq_uk.cq_id, cq->cq_uk.cq_ack_db); -} - /** * irdma_sc_cq_init - initialize completion q * @cq: cq struct @@ -2956,7 +2947,7 @@ static int irdma_sc_cq_create(struct irdma_sc_cq *cq, u64 scratch, return -ENOMEM; set_64bit_val(wqe, 0, cq->cq_uk.cq_size); - set_64bit_val(wqe, 8, (uintptr_t)cq >> 1); + set_64bit_val(wqe, 8, cq->cq_uk.cq_id); set_64bit_val(wqe, 16, FIELD_PREP(IRDMA_CQPSQ_CQ_SHADOW_READ_THRESHOLD, cq->shadow_read_threshold)); set_64bit_val(wqe, 32, (cq->virtual_map ? 0 : cq->cq_pa)); @@ -3013,7 +3004,7 @@ int irdma_sc_cq_destroy(struct irdma_sc_cq *cq, u64 scratch, bool post_sq) return -ENOMEM; set_64bit_val(wqe, 0, cq->cq_uk.cq_size); - set_64bit_val(wqe, 8, (uintptr_t)cq >> 1); + set_64bit_val(wqe, 8, cq->cq_uk.cq_id); set_64bit_val(wqe, 40, cq->shadow_area_pa); set_64bit_val(wqe, 48, (cq->virtual_map ? cq->first_pm_pbl_idx : 0)); @@ -3082,7 +3073,7 @@ static int irdma_sc_cq_modify(struct irdma_sc_cq *cq, return -ENOMEM; set_64bit_val(wqe, 0, info->cq_size); - set_64bit_val(wqe, 8, (uintptr_t)cq >> 1); + set_64bit_val(wqe, 8, cq->cq_uk.cq_id); set_64bit_val(wqe, 16, FIELD_PREP(IRDMA_CQPSQ_CQ_SHADOW_READ_THRESHOLD, info->shadow_read_threshold)); set_64bit_val(wqe, 32, info->cq_pa); @@ -4458,47 +4449,38 @@ int irdma_sc_ceq_destroy(struct irdma_sc_ceq *ceq, u64 scratch, bool post_sq) * irdma_sc_process_ceq - process ceq * @dev: sc device struct * @ceq: ceq sc structure + * @cq_idx: Pointer to a CQ ID that will be populated. * * It is expected caller serializes this function with cleanup_ceqes() * because these functions manipulate the same ceq + * + * Return: True if cq_idx has been populated with a CQ ID. */ -void *irdma_sc_process_ceq(struct irdma_sc_dev *dev, struct irdma_sc_ceq *ceq) +bool irdma_sc_process_ceq(struct irdma_sc_dev *dev, struct irdma_sc_ceq *ceq, + u32 *cq_idx) { u64 temp; __le64 *ceqe; - struct irdma_sc_cq *cq = NULL; - struct irdma_sc_cq *temp_cq; u8 polarity; - u32 cq_idx; do { - cq_idx = 0; ceqe = IRDMA_GET_CURRENT_CEQ_ELEM(ceq); get_64bit_val(ceqe, 0, &temp); polarity = (u8)FIELD_GET(IRDMA_CEQE_VALID, temp); if (polarity != ceq->polarity) - return NULL; + return false; - temp_cq = (struct irdma_sc_cq *)(unsigned long)(temp << 1); - if (!temp_cq) { - cq_idx = IRDMA_INVALID_CQ_IDX; - IRDMA_RING_MOVE_TAIL(ceq->ceq_ring); - - if (!IRDMA_RING_CURRENT_TAIL(ceq->ceq_ring)) - ceq->polarity ^= 1; - continue; - } - - cq = temp_cq; + /* Truncate. Discard valid bit which is MSb of temp. */ + *cq_idx = temp; + if (*cq_idx >= dev->hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].cnt) + *cq_idx = IRDMA_INVALID_CQ_IDX; IRDMA_RING_MOVE_TAIL(ceq->ceq_ring); if (!IRDMA_RING_CURRENT_TAIL(ceq->ceq_ring)) ceq->polarity ^= 1; - } while (cq_idx == IRDMA_INVALID_CQ_IDX); + } while (*cq_idx == IRDMA_INVALID_CQ_IDX); - if (cq) - irdma_sc_cq_ack(cq); - return cq; + return true; } /** @@ -4512,10 +4494,10 @@ void *irdma_sc_process_ceq(struct irdma_sc_dev *dev, struct irdma_sc_ceq *ceq) */ void irdma_sc_cleanup_ceqes(struct irdma_sc_cq *cq, struct irdma_sc_ceq *ceq) { - struct irdma_sc_cq *next_cq; u8 ceq_polarity = ceq->polarity; __le64 *ceqe; u8 polarity; + u32 cq_idx; u64 temp; int next; u32 i; @@ -4530,9 +4512,10 @@ void irdma_sc_cleanup_ceqes(struct irdma_sc_cq *cq, struct irdma_sc_ceq *ceq) if (polarity != ceq_polarity) return; - next_cq = (struct irdma_sc_cq *)(unsigned long)(temp << 1); - if (cq == next_cq) - set_64bit_val(ceqe, 0, temp & IRDMA_CEQE_VALID); + cq_idx = temp; + if (cq_idx == cq->cq_uk.cq_id) + set_64bit_val(ceqe, 0, (temp & IRDMA_CEQE_VALID) | + IRDMA_INVALID_CQ_IDX); next = IRDMA_RING_GET_NEXT_TAIL(ceq->ceq_ring, i); if (!next) @@ -4973,7 +4956,7 @@ int irdma_sc_ccq_destroy(struct irdma_sc_cq *ccq, u64 scratch, bool post_sq) return -ENOMEM; set_64bit_val(wqe, 0, ccq->cq_uk.cq_size); - set_64bit_val(wqe, 8, (uintptr_t)ccq >> 1); + set_64bit_val(wqe, 8, ccq->cq_uk.cq_id); set_64bit_val(wqe, 40, ccq->shadow_area_pa); hdr = ccq->cq_uk.cq_id | @@ -6459,6 +6442,9 @@ int irdma_sc_dev_init(enum irdma_vers ver, struct irdma_sc_dev *dev, int ret_code = 0; u8 db_size; + spin_lock_init(&dev->puda_cq_lock); + dev->ilq_cq = NULL; + dev->ieq_cq = NULL; INIT_LIST_HEAD(&dev->cqp_cmd_head); /* for CQP command backlog */ mutex_init(&dev->ws_mutex); dev->hmc_fn_id = info->hmc_fn_id; diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 5d418ef5cdcae..31c67b753fc0b 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -98,6 +98,74 @@ static void irdma_puda_ce_handler(struct irdma_pci_f *rf, irdma_sc_ccq_arm(cq); } +/** + * irdma_process_normal_ceqe - Handle a CEQE for a normal CQ. + * @rf: RDMA PCI function. + * @dev: iWARP device. + * @cq_idx: CQ ID. Must be in table bounds. + * + * Context: Atomic (CEQ lock must be held) + */ +static void irdma_process_normal_ceqe(struct irdma_pci_f *rf, + struct irdma_sc_dev *dev, u32 cq_idx) +{ + /* cq_idx bounds validated in irdma_sc_process_ceq. */ + struct irdma_cq *icq = READ_ONCE(rf->cq_table[cq_idx]); + struct irdma_sc_cq *cq; + + if (unlikely(!icq)) { + /* Should not happen since CEQ is scrubbed upon CQ delete. */ + ibdev_warn_ratelimited(to_ibdev(dev), "Stale CEQE for CQ %u", + cq_idx); + return; + } + + cq = &icq->sc_cq; + + if (unlikely(cq->cq_type != IRDMA_CQ_TYPE_IWARP)) { + ibdev_warn_ratelimited(to_ibdev(dev), "Unexpected CQ type %u", + cq->cq_type); + return; + } + + writel(cq->cq_uk.cq_id, cq->cq_uk.cq_ack_db); + irdma_iwarp_ce_handler(cq); +} + +/** + * irdma_process_reserved_ceqe - Handle a CEQE for a reserved CQ. + * @rf: RDMA PCI function. + * @dev: iWARP device. + * @cq_idx: CQ ID. + * + * Context: Atomic + */ +static void irdma_process_reserved_ceqe(struct irdma_pci_f *rf, + struct irdma_sc_dev *dev, u32 cq_idx) +{ + struct irdma_sc_cq *cq; + + if (cq_idx == IRDMA_RSVD_CQ_ID_CQP) { + cq = &rf->ccq.sc_cq; + /* CQP CQ lifetime > CEQ. */ + writel(cq->cq_uk.cq_id, cq->cq_uk.cq_ack_db); + queue_work(rf->cqp_cmpl_wq, &rf->cqp_cmpl_work); + } else if (cq_idx == IRDMA_RSVD_CQ_ID_ILQ || + cq_idx == IRDMA_RSVD_CQ_ID_IEQ) { + scoped_guard(spinlock_irqsave, &dev->puda_cq_lock) { + cq = (cq_idx == IRDMA_RSVD_CQ_ID_ILQ) ? + dev->ilq_cq : dev->ieq_cq; + if (!cq) { + ibdev_warn_ratelimited(to_ibdev(dev), + "Stale ILQ/IEQ CEQE"); + return; + } + writel(cq->cq_uk.cq_id, cq->cq_uk.cq_ack_db); + irdma_puda_ce_handler(rf, cq); + } + } +} + /** * irdma_process_ceq - handle ceq for completions * @rf: RDMA PCI function @@ -107,28 +175,28 @@ static void irdma_process_ceq(struct irdma_pci_f *rf, struct irdma_ceq *ceq) { struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_sc_ceq *sc_ceq; - struct irdma_sc_cq *cq; unsigned long flags; + u32 cq_idx; sc_ceq = &ceq->sc_ceq; do { spin_lock_irqsave(&ceq->ce_lock, flags); - cq = irdma_sc_process_ceq(dev, sc_ceq); - if (!cq) { + + if (!irdma_sc_process_ceq(dev, sc_ceq, &cq_idx)) { spin_unlock_irqrestore(&ceq->ce_lock, flags); break; } - if (cq->cq_type == IRDMA_CQ_TYPE_IWARP) - irdma_iwarp_ce_handler(cq); + /* Normal CQs must be handled while holding CEQ lock. */ + if (likely(cq_idx > IRDMA_RSVD_CQ_ID_IEQ)) { + irdma_process_normal_ceqe(rf, dev, cq_idx); + spin_unlock_irqrestore(&ceq->ce_lock, flags); + continue; + } spin_unlock_irqrestore(&ceq->ce_lock, flags); - if (cq->cq_type == IRDMA_CQ_TYPE_CQP) - queue_work(rf->cqp_cmpl_wq, &rf->cqp_cmpl_work); - else if (cq->cq_type == IRDMA_CQ_TYPE_ILQ || - cq->cq_type == IRDMA_CQ_TYPE_IEQ) - irdma_puda_ce_handler(rf, cq); + irdma_process_reserved_ceqe(rf, dev, cq_idx); } while (1); } diff --git a/drivers/infiniband/hw/irdma/puda.c b/drivers/infiniband/hw/irdma/puda.c index cee47ddbd1b59..4f1a8c97faf1a 100644 --- a/drivers/infiniband/hw/irdma/puda.c +++ b/drivers/infiniband/hw/irdma/puda.c @@ -809,6 +809,13 @@ error: dma_free_coherent(dev->hw->device, rsrc->cqmem.size, rsrc->cqmem.va, rsrc->cqmem.pa); rsrc->cqmem.va = NULL; + } else { + scoped_guard(spinlock_irqsave, &dev->puda_cq_lock) { + if (rsrc->type == IRDMA_PUDA_RSRC_TYPE_ILQ) + dev->ilq_cq = cq; + else + dev->ieq_cq = cq; + } } return ret; @@ -856,6 +863,13 @@ static void irdma_puda_free_cq(struct irdma_puda_rsrc *rsrc) struct irdma_ccq_cqe_info compl_info; struct irdma_sc_dev *dev = rsrc->dev; + scoped_guard(spinlock_irqsave, &dev->puda_cq_lock) { + if (rsrc->type == IRDMA_PUDA_RSRC_TYPE_ILQ) + dev->ilq_cq = NULL; + else + dev->ieq_cq = NULL; + } + if (rsrc->dev->ceq_valid) { irdma_cqp_cq_destroy_cmd(dev, &rsrc->cq); return; diff --git a/drivers/infiniband/hw/irdma/type.h b/drivers/infiniband/hw/irdma/type.h index 3de9240b727f6..da8c54d1f035a 100644 --- a/drivers/infiniband/hw/irdma/type.h +++ b/drivers/infiniband/hw/irdma/type.h @@ -707,6 +707,9 @@ struct irdma_sc_dev { struct irdma_sc_aeq *aeq; struct irdma_sc_ceq *ceq[IRDMA_CEQ_MAX_COUNT]; struct irdma_sc_cq *ccq; + spinlock_t puda_cq_lock; + struct irdma_sc_cq *ilq_cq; + struct irdma_sc_cq *ieq_cq; const struct irdma_irq_ops *irq_ops; struct irdma_qos qos[IRDMA_MAX_USER_PRIORITY]; struct irdma_hmc_fpm_misc hmc_fpm_misc; @@ -1344,7 +1347,8 @@ int irdma_sc_ceq_destroy(struct irdma_sc_ceq *ceq, u64 scratch, bool post_sq); int irdma_sc_ceq_init(struct irdma_sc_ceq *ceq, struct irdma_ceq_init_info *info); void irdma_sc_cleanup_ceqes(struct irdma_sc_cq *cq, struct irdma_sc_ceq *ceq); -void *irdma_sc_process_ceq(struct irdma_sc_dev *dev, struct irdma_sc_ceq *ceq); +bool irdma_sc_process_ceq(struct irdma_sc_dev *dev, struct irdma_sc_ceq *ceq, + u32 *cq_idx); int irdma_sc_aeq_init(struct irdma_sc_aeq *aeq, struct irdma_aeq_init_info *info); diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c index 3bac7c2588aea..6a385cea6b2c7 100644 --- a/drivers/infiniband/hw/irdma/utils.c +++ b/drivers/infiniband/hw/irdma/utils.c @@ -829,7 +829,8 @@ void irdma_cq_rem_ref(struct ib_cq *ibcq) return; } - iwdev->rf->cq_table[iwcq->cq_num] = NULL; + /* May be asynchronously sampled by CEQ ISR without holding tbl lock. */ + WRITE_ONCE(iwdev->rf->cq_table[iwcq->cq_num], NULL); spin_unlock_irqrestore(&iwdev->rf->cqtable_lock, flags); complete(&iwcq->free_cq); } diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 1f1efd4971a9d..cf8d191505740 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -2669,9 +2669,12 @@ static int irdma_create_cq(struct ib_cq *ibcq, goto cq_destroy; } } - rf->cq_table[cq_num] = iwcq; + init_completion(&iwcq->free_cq); + /* Populate table entry after CQ is fully created. */ + smp_store_release(&rf->cq_table[cq_num], iwcq); + return 0; cq_destroy: irdma_cq_wq_destroy(rf, cq); -- 2.47.3