]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
RDMA/irdma: Extend CQE Error and Flush Handling for GEN3 Devices
authorShiraz Saleem <shiraz.saleem@intel.com>
Wed, 27 Aug 2025 15:25:44 +0000 (10:25 -0500)
committerLeon Romanovsky <leon@kernel.org>
Thu, 18 Sep 2025 08:48:46 +0000 (04:48 -0400)
Enhance the CQE error and flush handling specific to GEN3 devices.
Unlike GEN1/2 devices, which depend on software to generate completions
in error, GEN3 devices leverage firmware to generate CQEs in error for
all WQEs posted after a QP moves to an error state.

Key changes include:
- Updating the CQ poll logic to properly advance the CQ head in the
event of a flush CQE.
- Updating the flush logic for GEN3 to pass error WQE idx
for SQ on an AE to flush out unprocessed WQEs in error.
- Isolating the decoding of AE to flush codes into a separate routine
irdma_ae_to_qp_err_code. This routine can now be leveraged to
flush error CQEs on an AE and when error CQE is received for SRQ.

Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
Signed-off-by: Tatyana Nikolova <tatyana.e.nikolova@intel.com>
Link: https://patch.msgid.link/20250827152545.2056-16-tatyana.e.nikolova@intel.com
Tested-by: Jacob Moroni <jmoroni@google.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
drivers/infiniband/hw/irdma/ctrl.c
drivers/infiniband/hw/irdma/defs.h
drivers/infiniband/hw/irdma/hw.c
drivers/infiniband/hw/irdma/type.h
drivers/infiniband/hw/irdma/uk.c
drivers/infiniband/hw/irdma/user.h
drivers/infiniband/hw/irdma/verbs.c

index f2a19a856975154e55207f9628cac3ffddf3721b..4ef1c29032f771ff567913ace5d18a7ce2e0c9e2 100644 (file)
@@ -2674,6 +2674,12 @@ int irdma_sc_qp_flush_wqes(struct irdma_sc_qp *qp,
                info->ae_code | FIELD_PREP(IRDMA_CQPSQ_FWQE_AESOURCE,
                                           info->ae_src) : 0;
        set_64bit_val(wqe, 8, temp);
+       if (cqp->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) {
+               set_64bit_val(wqe, 40,
+                             FIELD_PREP(IRDMA_CQPSQ_FWQE_ERR_SQ_IDX, info->err_sq_idx));
+               set_64bit_val(wqe, 48,
+                             FIELD_PREP(IRDMA_CQPSQ_FWQE_ERR_RQ_IDX, info->err_rq_idx));
+       }
 
        hdr = qp->qp_uk.qp_id |
              FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_FLUSH_WQES) |
@@ -2682,6 +2688,9 @@ int irdma_sc_qp_flush_wqes(struct irdma_sc_qp *qp,
              FIELD_PREP(IRDMA_CQPSQ_FWQE_FLUSHSQ, flush_sq) |
              FIELD_PREP(IRDMA_CQPSQ_FWQE_FLUSHRQ, flush_rq) |
              FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+       if (cqp->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3)
+               hdr |= FIELD_PREP(IRDMA_CQPSQ_FWQE_ERR_SQ_IDX_VALID, info->err_sq_idx_valid) |
+                      FIELD_PREP(IRDMA_CQPSQ_FWQE_ERR_RQ_IDX_VALID, info->err_rq_idx_valid);
        dma_wmb(); /* make sure WQE is written before valid bit is set */
 
        set_64bit_val(wqe, 24, hdr);
index 3b3680816a658765a8d660df96fc6efe89f5c4ca..983b22d7ae233fc12ba4f518b5c5d2f54840a5a2 100644 (file)
@@ -301,107 +301,6 @@ enum irdma_cqp_op_type {
 #define IRDMA_CQP_OP_GATHER_STATS                      0x2e
 #define IRDMA_CQP_OP_UP_MAP                            0x2f
 
-/* Async Events codes */
-#define IRDMA_AE_AMP_UNALLOCATED_STAG                                  0x0102
-#define IRDMA_AE_AMP_INVALID_STAG                                      0x0103
-#define IRDMA_AE_AMP_BAD_QP                                            0x0104
-#define IRDMA_AE_AMP_BAD_PD                                            0x0105
-#define IRDMA_AE_AMP_BAD_STAG_KEY                                      0x0106
-#define IRDMA_AE_AMP_BAD_STAG_INDEX                                    0x0107
-#define IRDMA_AE_AMP_BOUNDS_VIOLATION                                  0x0108
-#define IRDMA_AE_AMP_RIGHTS_VIOLATION                                  0x0109
-#define IRDMA_AE_AMP_TO_WRAP                                           0x010a
-#define IRDMA_AE_AMP_FASTREG_VALID_STAG                                        0x010c
-#define IRDMA_AE_AMP_FASTREG_MW_STAG                                   0x010d
-#define IRDMA_AE_AMP_FASTREG_INVALID_RIGHTS                            0x010e
-#define IRDMA_AE_AMP_FASTREG_INVALID_LENGTH                            0x0110
-#define IRDMA_AE_AMP_INVALIDATE_SHARED                                 0x0111
-#define IRDMA_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS                        0x0112
-#define IRDMA_AE_AMP_INVALIDATE_MR_WITH_BOUND_WINDOWS                  0x0113
-#define IRDMA_AE_AMP_MWBIND_VALID_STAG                                 0x0114
-#define IRDMA_AE_AMP_MWBIND_OF_MR_STAG                                 0x0115
-#define IRDMA_AE_AMP_MWBIND_TO_ZERO_BASED_STAG                         0x0116
-#define IRDMA_AE_AMP_MWBIND_TO_MW_STAG                                 0x0117
-#define IRDMA_AE_AMP_MWBIND_INVALID_RIGHTS                             0x0118
-#define IRDMA_AE_AMP_MWBIND_INVALID_BOUNDS                             0x0119
-#define IRDMA_AE_AMP_MWBIND_TO_INVALID_PARENT                          0x011a
-#define IRDMA_AE_AMP_MWBIND_BIND_DISABLED                              0x011b
-#define IRDMA_AE_PRIV_OPERATION_DENIED                                 0x011c
-#define IRDMA_AE_AMP_INVALIDATE_TYPE1_MW                               0x011d
-#define IRDMA_AE_AMP_MWBIND_ZERO_BASED_TYPE1_MW                                0x011e
-#define IRDMA_AE_AMP_FASTREG_INVALID_PBL_HPS_CFG                       0x011f
-#define IRDMA_AE_AMP_MWBIND_WRONG_TYPE                                 0x0120
-#define IRDMA_AE_AMP_FASTREG_PBLE_MISMATCH                             0x0121
-#define IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG                               0x0132
-#define IRDMA_AE_UDA_XMIT_BAD_PD                                       0x0133
-#define IRDMA_AE_UDA_XMIT_DGRAM_TOO_SHORT                              0x0134
-#define IRDMA_AE_UDA_L4LEN_INVALID                                     0x0135
-#define IRDMA_AE_BAD_CLOSE                                             0x0201
-#define IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE                               0x0202
-#define IRDMA_AE_CQ_OPERATION_ERROR                                    0x0203
-#define IRDMA_AE_RDMA_READ_WHILE_ORD_ZERO                              0x0205
-#define IRDMA_AE_STAG_ZERO_INVALID                                     0x0206
-#define IRDMA_AE_IB_RREQ_AND_Q1_FULL                                   0x0207
-#define IRDMA_AE_IB_INVALID_REQUEST                                    0x0208
-#define IRDMA_AE_SRQ_LIMIT                                             0x0209
-#define IRDMA_AE_WQE_UNEXPECTED_OPCODE                                 0x020a
-#define IRDMA_AE_WQE_INVALID_PARAMETER                                 0x020b
-#define IRDMA_AE_WQE_INVALID_FRAG_DATA                                 0x020c
-#define IRDMA_AE_IB_REMOTE_ACCESS_ERROR                                        0x020d
-#define IRDMA_AE_IB_REMOTE_OP_ERROR                                    0x020e
-#define IRDMA_AE_SRQ_CATASTROPHIC_ERROR                                        0x020f
-#define IRDMA_AE_WQE_LSMM_TOO_LONG                                     0x0220
-#define IRDMA_AE_ATOMIC_ALIGNMENT                                      0x0221
-#define IRDMA_AE_ATOMIC_MASK                                           0x0222
-#define IRDMA_AE_INVALID_REQUEST                                       0x0223
-#define IRDMA_AE_PCIE_ATOMIC_DISABLE                                   0x0224
-#define IRDMA_AE_DDP_INVALID_MSN_GAP_IN_MSN                            0x0301
-#define IRDMA_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER     0x0303
-#define IRDMA_AE_DDP_UBE_INVALID_DDP_VERSION                           0x0304
-#define IRDMA_AE_DDP_UBE_INVALID_MO                                    0x0305
-#define IRDMA_AE_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE               0x0306
-#define IRDMA_AE_DDP_UBE_INVALID_QN                                    0x0307
-#define IRDMA_AE_DDP_NO_L_BIT                                          0x0308
-#define IRDMA_AE_RDMAP_ROE_INVALID_RDMAP_VERSION                       0x0311
-#define IRDMA_AE_RDMAP_ROE_UNEXPECTED_OPCODE                           0x0312
-#define IRDMA_AE_ROE_INVALID_RDMA_READ_REQUEST                         0x0313
-#define IRDMA_AE_ROE_INVALID_RDMA_WRITE_OR_READ_RESP                   0x0314
-#define IRDMA_AE_ROCE_RSP_LENGTH_ERROR                                 0x0316
-#define IRDMA_AE_ROCE_EMPTY_MCG                                                0x0380
-#define IRDMA_AE_ROCE_BAD_MC_IP_ADDR                                   0x0381
-#define IRDMA_AE_ROCE_BAD_MC_QPID                                      0x0382
-#define IRDMA_AE_MCG_QP_PROTOCOL_MISMATCH                              0x0383
-#define IRDMA_AE_INVALID_ARP_ENTRY                                     0x0401
-#define IRDMA_AE_INVALID_TCP_OPTION_RCVD                               0x0402
-#define IRDMA_AE_STALE_ARP_ENTRY                                       0x0403
-#define IRDMA_AE_INVALID_AH_ENTRY                                      0x0406
-#define IRDMA_AE_LLP_CLOSE_COMPLETE                                    0x0501
-#define IRDMA_AE_LLP_CONNECTION_RESET                                  0x0502
-#define IRDMA_AE_LLP_FIN_RECEIVED                                      0x0503
-#define IRDMA_AE_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH      0x0504
-#define IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR                            0x0505
-#define IRDMA_AE_LLP_SEGMENT_TOO_SMALL                                 0x0507
-#define IRDMA_AE_LLP_SYN_RECEIVED                                      0x0508
-#define IRDMA_AE_LLP_TERMINATE_RECEIVED                                        0x0509
-#define IRDMA_AE_LLP_TOO_MANY_RETRIES                                  0x050a
-#define IRDMA_AE_LLP_TOO_MANY_KEEPALIVE_RETRIES                                0x050b
-#define IRDMA_AE_LLP_DOUBT_REACHABILITY                                        0x050c
-#define IRDMA_AE_LLP_CONNECTION_ESTABLISHED                            0x050e
-#define IRDMA_AE_LLP_TOO_MANY_RNRS                                     0x050f
-#define IRDMA_AE_RESOURCE_EXHAUSTION                                   0x0520
-#define IRDMA_AE_RESET_SENT                                            0x0601
-#define IRDMA_AE_TERMINATE_SENT                                                0x0602
-#define IRDMA_AE_RESET_NOT_SENT                                                0x0603
-#define IRDMA_AE_LCE_QP_CATASTROPHIC                                   0x0700
-#define IRDMA_AE_LCE_FUNCTION_CATASTROPHIC                             0x0701
-#define IRDMA_AE_LCE_CQ_CATASTROPHIC                                   0x0702
-#define IRDMA_AE_REMOTE_QP_CATASTROPHIC                                        0x0703
-#define IRDMA_AE_LOCAL_QP_CATASTROPHIC                                 0x0704
-#define IRDMA_AE_RCE_QP_CATASTROPHIC                                   0x0705
-#define IRDMA_AE_QP_SUSPEND_COMPLETE                                   0x0900
-#define IRDMA_AE_CQP_DEFERRED_COMPLETE                                 0x0901
-#define IRDMA_AE_ADAPTER_CATASTROPHIC                                  0x0B0B
-
 #define FLD_LS_64(dev, val, field)     \
        (((u64)(val) << (dev)->hw_shifts[field ## _S]) & (dev)->hw_masks[field ## _M])
 #define FLD_RS_64(dev, val, field)     \
@@ -771,6 +670,10 @@ enum irdma_cqp_op_type {
 #define IRDMA_CQPSQ_FWQE_USERFLCODE BIT_ULL(60)
 #define IRDMA_CQPSQ_FWQE_FLUSHSQ BIT_ULL(61)
 #define IRDMA_CQPSQ_FWQE_FLUSHRQ BIT_ULL(62)
+#define IRDMA_CQPSQ_FWQE_ERR_SQ_IDX_VALID BIT_ULL(42)
+#define IRDMA_CQPSQ_FWQE_ERR_SQ_IDX GENMASK_ULL(49, 32)
+#define IRDMA_CQPSQ_FWQE_ERR_RQ_IDX_VALID BIT_ULL(43)
+#define IRDMA_CQPSQ_FWQE_ERR_RQ_IDX GENMASK_ULL(46, 32)
 #define IRDMA_CQPSQ_MAPT_PORT GENMASK_ULL(15, 0)
 #define IRDMA_CQPSQ_MAPT_ADDPORT BIT_ULL(62)
 #define IRDMA_CQPSQ_UPESD_SDCMD GENMASK_ULL(31, 0)
index 27b9623c2b09b043a723eb31d8b9185c140d78f5..7bad0e38786a21c8a389fad33cf0e95aa5c79505 100644 (file)
@@ -135,76 +135,24 @@ static void irdma_process_ceq(struct irdma_pci_f *rf, struct irdma_ceq *ceq)
 static void irdma_set_flush_fields(struct irdma_sc_qp *qp,
                                   struct irdma_aeqe_info *info)
 {
+       struct qp_err_code qp_err;
+
        qp->sq_flush_code = info->sq;
        qp->rq_flush_code = info->rq;
-       qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC;
-
-       switch (info->ae_id) {
-       case IRDMA_AE_AMP_BOUNDS_VIOLATION:
-       case IRDMA_AE_AMP_INVALID_STAG:
-       case IRDMA_AE_AMP_RIGHTS_VIOLATION:
-       case IRDMA_AE_AMP_UNALLOCATED_STAG:
-       case IRDMA_AE_AMP_BAD_PD:
-       case IRDMA_AE_AMP_BAD_QP:
-       case IRDMA_AE_AMP_BAD_STAG_KEY:
-       case IRDMA_AE_AMP_BAD_STAG_INDEX:
-       case IRDMA_AE_AMP_TO_WRAP:
-       case IRDMA_AE_PRIV_OPERATION_DENIED:
-               qp->flush_code = FLUSH_PROT_ERR;
-               qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR;
-               break;
-       case IRDMA_AE_UDA_XMIT_BAD_PD:
-       case IRDMA_AE_WQE_UNEXPECTED_OPCODE:
-               qp->flush_code = FLUSH_LOC_QP_OP_ERR;
-               qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC;
-               break;
-       case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG:
-       case IRDMA_AE_UDA_XMIT_DGRAM_TOO_SHORT:
-       case IRDMA_AE_UDA_L4LEN_INVALID:
-       case IRDMA_AE_DDP_UBE_INVALID_MO:
-       case IRDMA_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER:
-               qp->flush_code = FLUSH_LOC_LEN_ERR;
-               qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC;
-               break;
-       case IRDMA_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS:
-       case IRDMA_AE_IB_REMOTE_ACCESS_ERROR:
-               qp->flush_code = FLUSH_REM_ACCESS_ERR;
-               qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR;
-               break;
-       case IRDMA_AE_LLP_SEGMENT_TOO_SMALL:
-       case IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR:
-       case IRDMA_AE_ROCE_RSP_LENGTH_ERROR:
-       case IRDMA_AE_IB_REMOTE_OP_ERROR:
-               qp->flush_code = FLUSH_REM_OP_ERR;
-               qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC;
-               break;
-       case IRDMA_AE_LCE_QP_CATASTROPHIC:
-               qp->flush_code = FLUSH_FATAL_ERR;
-               qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC;
-               break;
-       case IRDMA_AE_IB_RREQ_AND_Q1_FULL:
-               qp->flush_code = FLUSH_GENERAL_ERR;
-               break;
-       case IRDMA_AE_LLP_TOO_MANY_RETRIES:
-               qp->flush_code = FLUSH_RETRY_EXC_ERR;
-               qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC;
-               break;
-       case IRDMA_AE_AMP_MWBIND_INVALID_RIGHTS:
-       case IRDMA_AE_AMP_MWBIND_BIND_DISABLED:
-       case IRDMA_AE_AMP_MWBIND_INVALID_BOUNDS:
-       case IRDMA_AE_AMP_MWBIND_VALID_STAG:
-               qp->flush_code = FLUSH_MW_BIND_ERR;
-               qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR;
-               break;
-       case IRDMA_AE_IB_INVALID_REQUEST:
-               qp->flush_code = FLUSH_REM_INV_REQ_ERR;
-               qp->event_type = IRDMA_QP_EVENT_REQ_ERR;
-               break;
-       default:
-               qp->flush_code = FLUSH_GENERAL_ERR;
-               qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC;
-               break;
+       if (qp->qp_uk.uk_attrs->hw_rev >= IRDMA_GEN_3) {
+               if (info->sq) {
+                       qp->err_sq_idx_valid = true;
+                       qp->err_sq_idx = info->wqe_idx;
+               }
+               if (info->rq) {
+                       qp->err_rq_idx_valid = true;
+                       qp->err_rq_idx = info->wqe_idx;
+               }
        }
+
+       qp_err = irdma_ae_to_qp_err_code(info->ae_id);
+       qp->flush_code = qp_err.flush_code;
+       qp->event_type = qp_err.event_type;
 }
 
 /**
@@ -320,7 +268,6 @@ static void irdma_process_aeq(struct irdma_pci_f *rf)
                        if (info->ae_id != IRDMA_AE_QP_SUSPEND_COMPLETE)
                                iwqp->last_aeq = info->ae_id;
                        spin_unlock_irqrestore(&iwqp->lock, flags);
-                       ctx_info = &iwqp->ctx_info;
                } else if (info->srq) {
                        if (info->ae_id != IRDMA_AE_SRQ_LIMIT)
                                continue;
@@ -466,9 +413,11 @@ static void irdma_process_aeq(struct irdma_pci_f *rf)
                default:
                        ibdev_err(&iwdev->ibdev, "abnormal ae_id = 0x%x bool qp=%d qp_id = %d, ae_src=%d\n",
                                  info->ae_id, info->qp, info->qp_cq_id, info->ae_src);
-                       if (rdma_protocol_roce(&iwdev->ibdev, 1)) {
-                               ctx_info->roce_info->err_rq_idx_valid = info->rq;
-                               if (info->rq) {
+                       ctx_info = &iwqp->ctx_info;
+                       if (rdma_protocol_roce(&iwqp->iwdev->ibdev, 1)) {
+                               ctx_info->roce_info->err_rq_idx_valid =
+                                       ctx_info->srq_valid ? false : info->err_rq_idx_valid;
+                               if (ctx_info->roce_info->err_rq_idx_valid) {
                                        ctx_info->roce_info->err_rq_idx = info->wqe_idx;
                                        irdma_sc_qp_setctx_roce(&iwqp->sc_qp, iwqp->host_ctx.va,
                                                                ctx_info);
@@ -2832,7 +2781,9 @@ void irdma_flush_wqes(struct irdma_qp *iwqp, u32 flush_mask)
        struct irdma_pci_f *rf = iwqp->iwdev->rf;
        u8 flush_code = iwqp->sc_qp.flush_code;
 
-       if (!(flush_mask & IRDMA_FLUSH_SQ) && !(flush_mask & IRDMA_FLUSH_RQ))
+       if ((!(flush_mask & IRDMA_FLUSH_SQ) &&
+            !(flush_mask & IRDMA_FLUSH_RQ)) ||
+           ((flush_mask & IRDMA_REFLUSH) && rf->rdma_ver >= IRDMA_GEN_3))
                return;
 
        /* Set flush info fields*/
@@ -2845,6 +2796,10 @@ void irdma_flush_wqes(struct irdma_qp *iwqp, u32 flush_mask)
        info.rq_major_code = IRDMA_FLUSH_MAJOR_ERR;
        info.rq_minor_code = FLUSH_GENERAL_ERR;
        info.userflushcode = true;
+       info.err_sq_idx_valid = iwqp->sc_qp.err_sq_idx_valid;
+       info.err_sq_idx = iwqp->sc_qp.err_sq_idx;
+       info.err_rq_idx_valid = iwqp->sc_qp.err_rq_idx_valid;
+       info.err_rq_idx = iwqp->sc_qp.err_rq_idx;
 
        if (flush_mask & IRDMA_REFLUSH) {
                if (info.sq)
index c11b901ff1198c25206e704cc24360270ed5c6f2..4ae77cdde9dc79be657a63651a3076961103eed8 100644 (file)
@@ -97,12 +97,6 @@ enum irdma_term_mpa_errors {
        MPA_REQ_RSP = 0x04,
 };
 
-enum irdma_qp_event_type {
-       IRDMA_QP_EVENT_CATASTROPHIC,
-       IRDMA_QP_EVENT_ACCESS_ERR,
-       IRDMA_QP_EVENT_REQ_ERR,
-};
-
 enum irdma_hw_stats_index {
        /* gen1 - 32-bit */
        IRDMA_HW_STAT_INDEX_IP4RXDISCARD        = 0,
@@ -565,6 +559,10 @@ struct irdma_sc_qp {
        bool virtual_map:1;
        bool flush_sq:1;
        bool flush_rq:1;
+       bool err_sq_idx_valid:1;
+       bool err_rq_idx_valid:1;
+       u32 err_sq_idx;
+       u32 err_rq_idx;
        bool sq_flush_code:1;
        bool rq_flush_code:1;
        u32 pkt_limit;
@@ -1289,6 +1287,8 @@ struct irdma_cqp_manage_push_page_info {
 };
 
 struct irdma_qp_flush_info {
+       u32 err_sq_idx;
+       u32 err_rq_idx;
        u16 sq_minor_code;
        u16 sq_major_code;
        u16 rq_minor_code;
@@ -1299,6 +1299,8 @@ struct irdma_qp_flush_info {
        bool rq:1;
        bool userflushcode:1;
        bool generate_ae:1;
+       bool err_sq_idx_valid:1;
+       bool err_rq_idx_valid:1;
 };
 
 struct irdma_gen_ae_info {
index fb944c49f864472c8e49a8e513651f7f07835014..ce1ae10c30fcadee179accf3717e5924ae9f0334 100644 (file)
@@ -1148,6 +1148,7 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
        __le64 *cqe;
        struct irdma_qp_uk *qp;
        struct irdma_srq_uk *srq;
+       struct qp_err_code qp_err;
        u8 is_srq;
        struct irdma_ring *pring = NULL;
        u32 wqe_idx;
@@ -1233,16 +1234,35 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
        if (info->error) {
                info->major_err = FIELD_GET(IRDMA_CQ_MAJERR, qword3);
                info->minor_err = FIELD_GET(IRDMA_CQ_MINERR, qword3);
-               if (info->major_err == IRDMA_FLUSH_MAJOR_ERR) {
-                       info->comp_status = IRDMA_COMPL_STATUS_FLUSHED;
+               switch (info->major_err) {
+               case IRDMA_SRQFLUSH_RSVD_MAJOR_ERR:
+                       qp_err = irdma_ae_to_qp_err_code(info->minor_err);
+                       info->minor_err = qp_err.flush_code;
+                       fallthrough;
+               case IRDMA_FLUSH_MAJOR_ERR:
                        /* Set the min error to standard flush error code for remaining cqes */
                        if (info->minor_err != FLUSH_GENERAL_ERR) {
                                qword3 &= ~IRDMA_CQ_MINERR;
                                qword3 |= FIELD_PREP(IRDMA_CQ_MINERR, FLUSH_GENERAL_ERR);
                                set_64bit_val(cqe, 24, qword3);
                        }
-               } else {
-                       info->comp_status = IRDMA_COMPL_STATUS_UNKNOWN;
+                       info->comp_status = IRDMA_COMPL_STATUS_FLUSHED;
+                       break;
+               default:
+#define IRDMA_CIE_SIGNATURE 0xE
+#define IRDMA_CQMAJERR_HIGH_NIBBLE GENMASK(15, 12)
+                       if (info->q_type == IRDMA_CQE_QTYPE_SQ &&
+                           qp->qp_type == IRDMA_QP_TYPE_ROCE_UD &&
+                           FIELD_GET(IRDMA_CQMAJERR_HIGH_NIBBLE, info->major_err)
+                           == IRDMA_CIE_SIGNATURE) {
+                               info->error = 0;
+                               info->major_err = 0;
+                               info->minor_err = 0;
+                               info->comp_status = IRDMA_COMPL_STATUS_SUCCESS;
+                       } else {
+                               info->comp_status = IRDMA_COMPL_STATUS_UNKNOWN;
+                       }
+                       break;
                }
        } else {
                info->comp_status = IRDMA_COMPL_STATUS_SUCCESS;
@@ -1251,7 +1271,6 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
        get_64bit_val(cqe, 0, &qword0);
        get_64bit_val(cqe, 16, &qword2);
 
-       info->tcp_seq_num_rtt = (u32)FIELD_GET(IRDMACQ_TCPSEQNUMRTT, qword0);
        info->qp_id = (u32)FIELD_GET(IRDMACQ_QPID, qword2);
        info->ud_src_qpn = (u32)FIELD_GET(IRDMACQ_UDSRCQPN, qword2);
 
@@ -1377,9 +1396,15 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
        ret_code = 0;
 
 exit:
-       if (!ret_code && info->comp_status == IRDMA_COMPL_STATUS_FLUSHED)
+       if (!ret_code && info->comp_status == IRDMA_COMPL_STATUS_FLUSHED) {
                if (pring && IRDMA_RING_MORE_WORK(*pring))
-                       move_cq_head = false;
+               /* Park CQ head during a flush to generate additional CQEs
+                * from SW for all unprocessed WQEs. For GEN3 and beyond
+                * FW will generate/flush these CQEs so move to the next CQE
+                */
+                       move_cq_head = qp->uk_attrs->hw_rev <= IRDMA_GEN_2 ?
+                                               false : true;
+       }
 
        if (move_cq_head) {
                IRDMA_RING_MOVE_HEAD_NOCHECK(cq->cq_ring);
index ed7ce98e887bf472a4abfb0159770c82d6e5d13d..ab57f689827a024fbbde40b3487809baa60a8b8f 100644 (file)
 #define IRDMA_OP_TYPE_REC      0x3e
 #define IRDMA_OP_TYPE_REC_IMM  0x3f
 
-#define IRDMA_FLUSH_MAJOR_ERR  1
+#define IRDMA_FLUSH_MAJOR_ERR 1
+#define IRDMA_SRQFLUSH_RSVD_MAJOR_ERR 0xfffe
+
+/* Async Events codes */
+#define IRDMA_AE_AMP_UNALLOCATED_STAG                                  0x0102
+#define IRDMA_AE_AMP_INVALID_STAG                                      0x0103
+#define IRDMA_AE_AMP_BAD_QP                                            0x0104
+#define IRDMA_AE_AMP_BAD_PD                                            0x0105
+#define IRDMA_AE_AMP_BAD_STAG_KEY                                      0x0106
+#define IRDMA_AE_AMP_BAD_STAG_INDEX                                    0x0107
+#define IRDMA_AE_AMP_BOUNDS_VIOLATION                                  0x0108
+#define IRDMA_AE_AMP_RIGHTS_VIOLATION                                  0x0109
+#define IRDMA_AE_AMP_TO_WRAP                                           0x010a
+#define IRDMA_AE_AMP_FASTREG_VALID_STAG                                        0x010c
+#define IRDMA_AE_AMP_FASTREG_MW_STAG                                   0x010d
+#define IRDMA_AE_AMP_FASTREG_INVALID_RIGHTS                            0x010e
+#define IRDMA_AE_AMP_FASTREG_INVALID_LENGTH                            0x0110
+#define IRDMA_AE_AMP_INVALIDATE_SHARED                                 0x0111
+#define IRDMA_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS                        0x0112
+#define IRDMA_AE_AMP_INVALIDATE_MR_WITH_BOUND_WINDOWS                  0x0113
+#define IRDMA_AE_AMP_MWBIND_VALID_STAG                                 0x0114
+#define IRDMA_AE_AMP_MWBIND_OF_MR_STAG                                 0x0115
+#define IRDMA_AE_AMP_MWBIND_TO_ZERO_BASED_STAG                         0x0116
+#define IRDMA_AE_AMP_MWBIND_TO_MW_STAG                                 0x0117
+#define IRDMA_AE_AMP_MWBIND_INVALID_RIGHTS                             0x0118
+#define IRDMA_AE_AMP_MWBIND_INVALID_BOUNDS                             0x0119
+#define IRDMA_AE_AMP_MWBIND_TO_INVALID_PARENT                          0x011a
+#define IRDMA_AE_AMP_MWBIND_BIND_DISABLED                              0x011b
+#define IRDMA_AE_PRIV_OPERATION_DENIED                                 0x011c
+#define IRDMA_AE_AMP_INVALIDATE_TYPE1_MW                               0x011d
+#define IRDMA_AE_AMP_MWBIND_ZERO_BASED_TYPE1_MW                                0x011e
+#define IRDMA_AE_AMP_FASTREG_INVALID_PBL_HPS_CFG                       0x011f
+#define IRDMA_AE_AMP_MWBIND_WRONG_TYPE                                 0x0120
+#define IRDMA_AE_AMP_FASTREG_PBLE_MISMATCH                             0x0121
+#define IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG                               0x0132
+#define IRDMA_AE_UDA_XMIT_BAD_PD                                       0x0133
+#define IRDMA_AE_UDA_XMIT_DGRAM_TOO_SHORT                              0x0134
+#define IRDMA_AE_UDA_L4LEN_INVALID                                     0x0135
+#define IRDMA_AE_BAD_CLOSE                                             0x0201
+#define IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE                               0x0202
+#define IRDMA_AE_CQ_OPERATION_ERROR                                    0x0203
+#define IRDMA_AE_RDMA_READ_WHILE_ORD_ZERO                              0x0205
+#define IRDMA_AE_STAG_ZERO_INVALID                                     0x0206
+#define IRDMA_AE_IB_RREQ_AND_Q1_FULL                                   0x0207
+#define IRDMA_AE_IB_INVALID_REQUEST                                    0x0208
+#define IRDMA_AE_SRQ_LIMIT                                             0x0209
+#define IRDMA_AE_WQE_UNEXPECTED_OPCODE                                 0x020a
+#define IRDMA_AE_WQE_INVALID_PARAMETER                                 0x020b
+#define IRDMA_AE_WQE_INVALID_FRAG_DATA                                 0x020c
+#define IRDMA_AE_IB_REMOTE_ACCESS_ERROR                                        0x020d
+#define IRDMA_AE_IB_REMOTE_OP_ERROR                                    0x020e
+#define IRDMA_AE_SRQ_CATASTROPHIC_ERROR                                        0x020f
+#define IRDMA_AE_WQE_LSMM_TOO_LONG                                     0x0220
+#define IRDMA_AE_ATOMIC_ALIGNMENT                                      0x0221
+#define IRDMA_AE_ATOMIC_MASK                                           0x0222
+#define IRDMA_AE_INVALID_REQUEST                                       0x0223
+#define IRDMA_AE_PCIE_ATOMIC_DISABLE                                   0x0224
+#define IRDMA_AE_DDP_INVALID_MSN_GAP_IN_MSN                            0x0301
+#define IRDMA_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER     0x0303
+#define IRDMA_AE_DDP_UBE_INVALID_DDP_VERSION                           0x0304
+#define IRDMA_AE_DDP_UBE_INVALID_MO                                    0x0305
+#define IRDMA_AE_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE               0x0306
+#define IRDMA_AE_DDP_UBE_INVALID_QN                                    0x0307
+#define IRDMA_AE_DDP_NO_L_BIT                                          0x0308
+#define IRDMA_AE_RDMAP_ROE_INVALID_RDMAP_VERSION                       0x0311
+#define IRDMA_AE_RDMAP_ROE_UNEXPECTED_OPCODE                           0x0312
+#define IRDMA_AE_ROE_INVALID_RDMA_READ_REQUEST                         0x0313
+#define IRDMA_AE_ROE_INVALID_RDMA_WRITE_OR_READ_RESP                   0x0314
+#define IRDMA_AE_ROCE_RSP_LENGTH_ERROR                                 0x0316
+#define IRDMA_AE_ROCE_EMPTY_MCG                                                0x0380
+#define IRDMA_AE_ROCE_BAD_MC_IP_ADDR                                   0x0381
+#define IRDMA_AE_ROCE_BAD_MC_QPID                                      0x0382
+#define IRDMA_AE_MCG_QP_PROTOCOL_MISMATCH                              0x0383
+#define IRDMA_AE_INVALID_ARP_ENTRY                                     0x0401
+#define IRDMA_AE_INVALID_TCP_OPTION_RCVD                               0x0402
+#define IRDMA_AE_STALE_ARP_ENTRY                                       0x0403
+#define IRDMA_AE_INVALID_AH_ENTRY                                      0x0406
+#define IRDMA_AE_LLP_CLOSE_COMPLETE                                    0x0501
+#define IRDMA_AE_LLP_CONNECTION_RESET                                  0x0502
+#define IRDMA_AE_LLP_FIN_RECEIVED                                      0x0503
+#define IRDMA_AE_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH      0x0504
+#define IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR                            0x0505
+#define IRDMA_AE_LLP_SEGMENT_TOO_SMALL                                 0x0507
+#define IRDMA_AE_LLP_SYN_RECEIVED                                      0x0508
+#define IRDMA_AE_LLP_TERMINATE_RECEIVED                                        0x0509
+#define IRDMA_AE_LLP_TOO_MANY_RETRIES                                  0x050a
+#define IRDMA_AE_LLP_TOO_MANY_KEEPALIVE_RETRIES                                0x050b
+#define IRDMA_AE_LLP_DOUBT_REACHABILITY                                        0x050c
+#define IRDMA_AE_LLP_CONNECTION_ESTABLISHED                            0x050e
+#define IRDMA_AE_LLP_TOO_MANY_RNRS                                     0x050f
+#define IRDMA_AE_RESOURCE_EXHAUSTION                                   0x0520
+#define IRDMA_AE_RESET_SENT                                            0x0601
+#define IRDMA_AE_TERMINATE_SENT                                                0x0602
+#define IRDMA_AE_RESET_NOT_SENT                                                0x0603
+#define IRDMA_AE_LCE_QP_CATASTROPHIC                                   0x0700
+#define IRDMA_AE_LCE_FUNCTION_CATASTROPHIC                             0x0701
+#define IRDMA_AE_LCE_CQ_CATASTROPHIC                                   0x0702
+#define IRDMA_AE_REMOTE_QP_CATASTROPHIC                                        0x0703
+#define IRDMA_AE_LOCAL_QP_CATASTROPHIC                                 0x0704
+#define IRDMA_AE_RCE_QP_CATASTROPHIC                                   0x0705
+#define IRDMA_AE_QP_SUSPEND_COMPLETE                                   0x0900
+#define IRDMA_AE_CQP_DEFERRED_COMPLETE                                 0x0901
+#define IRDMA_AE_ADAPTER_CATASTROPHIC                                  0x0B0B
 
 enum irdma_device_caps_const {
        IRDMA_WQE_SIZE =                        4,
@@ -109,6 +211,13 @@ enum irdma_flush_opcode {
        FLUSH_RETRY_EXC_ERR,
        FLUSH_MW_BIND_ERR,
        FLUSH_REM_INV_REQ_ERR,
+       FLUSH_RNR_RETRY_EXC_ERR,
+};
+
+enum irdma_qp_event_type {
+       IRDMA_QP_EVENT_CATASTROPHIC,
+       IRDMA_QP_EVENT_ACCESS_ERR,
+       IRDMA_QP_EVENT_REQ_ERR,
 };
 
 enum irdma_cmpl_status {
@@ -282,6 +391,11 @@ struct irdma_cq_poll_info {
        bool imm_valid:1;
 };
 
+struct qp_err_code {
+       enum irdma_flush_opcode flush_code;
+       enum irdma_qp_event_type event_type;
+};
+
 int irdma_uk_atomic_compare_swap(struct irdma_qp_uk *qp,
                                 struct irdma_post_sq_info *info, bool post_sq);
 int irdma_uk_atomic_fetch_add(struct irdma_qp_uk *qp,
@@ -479,4 +593,82 @@ int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, u32 rq_size, u8 shift,
 int irdma_get_srqdepth(struct irdma_uk_attrs *uk_attrs, u32 srq_size, u8 shift,
                       u32 *srqdepth);
 void irdma_clr_wqes(struct irdma_qp_uk *qp, u32 qp_wqe_idx);
+
+static inline struct qp_err_code irdma_ae_to_qp_err_code(u16 ae_id)
+{
+       struct qp_err_code qp_err = {};
+
+       switch (ae_id) {
+       case IRDMA_AE_AMP_BOUNDS_VIOLATION:
+       case IRDMA_AE_AMP_INVALID_STAG:
+       case IRDMA_AE_AMP_RIGHTS_VIOLATION:
+       case IRDMA_AE_AMP_UNALLOCATED_STAG:
+       case IRDMA_AE_AMP_BAD_PD:
+       case IRDMA_AE_AMP_BAD_QP:
+       case IRDMA_AE_AMP_BAD_STAG_KEY:
+       case IRDMA_AE_AMP_BAD_STAG_INDEX:
+       case IRDMA_AE_AMP_TO_WRAP:
+       case IRDMA_AE_PRIV_OPERATION_DENIED:
+               qp_err.flush_code = FLUSH_PROT_ERR;
+               qp_err.event_type = IRDMA_QP_EVENT_ACCESS_ERR;
+               break;
+       case IRDMA_AE_UDA_XMIT_BAD_PD:
+       case IRDMA_AE_WQE_UNEXPECTED_OPCODE:
+               qp_err.flush_code = FLUSH_LOC_QP_OP_ERR;
+               qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC;
+               break;
+       case IRDMA_AE_UDA_XMIT_DGRAM_TOO_SHORT:
+       case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG:
+       case IRDMA_AE_UDA_L4LEN_INVALID:
+       case IRDMA_AE_DDP_UBE_INVALID_MO:
+       case IRDMA_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER:
+               qp_err.flush_code = FLUSH_LOC_LEN_ERR;
+               qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC;
+               break;
+       case IRDMA_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS:
+       case IRDMA_AE_IB_REMOTE_ACCESS_ERROR:
+               qp_err.flush_code = FLUSH_REM_ACCESS_ERR;
+               qp_err.event_type = IRDMA_QP_EVENT_ACCESS_ERR;
+               break;
+       case IRDMA_AE_AMP_MWBIND_INVALID_RIGHTS:
+       case IRDMA_AE_AMP_MWBIND_BIND_DISABLED:
+       case IRDMA_AE_AMP_MWBIND_INVALID_BOUNDS:
+       case IRDMA_AE_AMP_MWBIND_VALID_STAG:
+               qp_err.flush_code = FLUSH_MW_BIND_ERR;
+               qp_err.event_type = IRDMA_QP_EVENT_ACCESS_ERR;
+               break;
+       case IRDMA_AE_LLP_TOO_MANY_RETRIES:
+               qp_err.flush_code = FLUSH_RETRY_EXC_ERR;
+               qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC;
+               break;
+       case IRDMA_AE_IB_INVALID_REQUEST:
+               qp_err.flush_code = FLUSH_REM_INV_REQ_ERR;
+               qp_err.event_type = IRDMA_QP_EVENT_REQ_ERR;
+               break;
+       case IRDMA_AE_LLP_SEGMENT_TOO_SMALL:
+       case IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR:
+       case IRDMA_AE_ROCE_RSP_LENGTH_ERROR:
+       case IRDMA_AE_IB_REMOTE_OP_ERROR:
+               qp_err.flush_code = FLUSH_REM_OP_ERR;
+               qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC;
+               break;
+       case IRDMA_AE_LLP_TOO_MANY_RNRS:
+               qp_err.flush_code = FLUSH_RNR_RETRY_EXC_ERR;
+               qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC;
+               break;
+       case IRDMA_AE_LCE_QP_CATASTROPHIC:
+       case IRDMA_AE_REMOTE_QP_CATASTROPHIC:
+       case IRDMA_AE_LOCAL_QP_CATASTROPHIC:
+       case IRDMA_AE_RCE_QP_CATASTROPHIC:
+               qp_err.flush_code = FLUSH_FATAL_ERR;
+               qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC;
+               break;
+       default:
+               qp_err.flush_code = FLUSH_GENERAL_ERR;
+               qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC;
+               break;
+       }
+
+       return qp_err;
+}
 #endif /* IRDMA_USER_H */
index 167b5bdc668e77e5354fff7a9a07e7e9c59e7a08..24f9503f410f8c83e2e9dba7196eb129617c4bd7 100644 (file)
@@ -542,7 +542,7 @@ static int irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
 
        iwqp->sc_qp.qp_uk.destroy_pending = true;
 
-       if (iwqp->iwarp_state == IRDMA_QP_STATE_RTS)
+       if (iwqp->iwarp_state >= IRDMA_QP_STATE_IDLE)
                irdma_modify_qp_to_err(&iwqp->sc_qp);
 
        if (!iwqp->user_mode)
@@ -4132,6 +4132,7 @@ static int irdma_post_send(struct ib_qp *ibqp,
                mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush,
                                 msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS));
        }
+
        if (err)
                *bad_wr = ib_wr;
 
@@ -4255,6 +4256,8 @@ static enum ib_wc_status irdma_flush_err_to_ib_wc_status(enum irdma_flush_opcode
                return IB_WC_MW_BIND_ERR;
        case FLUSH_REM_INV_REQ_ERR:
                return IB_WC_REM_INV_REQ_ERR;
+       case FLUSH_RNR_RETRY_EXC_ERR:
+               return IB_WC_RNR_RETRY_EXC_ERR;
        case FLUSH_FATAL_ERR:
        default:
                return IB_WC_FATAL_ERR;