]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 8 Dec 2023 20:27:11 +0000 (12:27 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 8 Dec 2023 20:27:11 +0000 (12:27 -0800)
Pull rdma fixes from Jason Gunthorpe:
 "Primarily rtrs and irdma fixes:

   - Fix uninitialized value in ib_get_eth_speed()

   - Fix hns refusing to work if userspace doesn't select the correct
     congestion control algorithm

   - Several irdma fixes - unreliable Send Queue Drain, use after free,
     64k page size bugs, device removal races

   - Several rtrs bug fixes - crashes, memory leaks, use after free, bad
     credit accounting, bogus WARN_ON

   - Typos and a MAINTAINER update"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma:
  RDMA/irdma: Avoid free the non-cqp_request scratch
  RDMA/irdma: Fix support for 64k pages
  RDMA/irdma: Ensure iWarp QP queue memory is OS paged aligned
  RDMA/core: Fix umem iterator when PAGE_SIZE is greater then HCA pgsz
  RDMA/irdma: Fix UAF in irdma_sc_ccq_get_cqe_info()
  RDMA/bnxt_re: Correct module description string
  RDMA/rtrs-clt: Remove the warnings for req in_use check
  RDMA/rtrs-clt: Fix the max_send_wr setting
  RDMA/rtrs-srv: Destroy path files after making sure no IOs in-flight
  RDMA/rtrs-srv: Free srv_mr iu only when always_invalidate is true
  RDMA/rtrs-srv: Check return values while processing info request
  RDMA/rtrs-clt: Start hb after path_up
  RDMA/rtrs-srv: Do not unconditionally enable irq
  MAINTAINERS: Add Chengchang Tang as Hisilicon RoCE maintainer
  RDMA/irdma: Add wait for suspend on SQD
  RDMA/irdma: Do not modify to SQD on error
  RDMA/hns: Fix unnecessary err return when using invalid congest control algorithm
  RDMA/core: Fix uninit-value access in ib_get_eth_speed()

14 files changed:
MAINTAINERS
drivers/infiniband/core/umem.c
drivers/infiniband/core/verbs.c
drivers/infiniband/hw/bnxt_re/main.c
drivers/infiniband/hw/hns/hns_roce_hw_v2.c
drivers/infiniband/hw/irdma/hw.c
drivers/infiniband/hw/irdma/main.c
drivers/infiniband/hw/irdma/main.h
drivers/infiniband/hw/irdma/verbs.c
drivers/infiniband/hw/irdma/verbs.h
drivers/infiniband/ulp/rtrs/rtrs-clt.c
drivers/infiniband/ulp/rtrs/rtrs-srv.c
include/rdma/ib_umem.h
include/rdma/ib_verbs.h

index d33cdf3d3ca64a6bf59c66049fa13790ef5e3d9c..e2c6187a3ac80f18aca297c6fb73332708c105df 100644 (file)
@@ -9573,6 +9573,7 @@ F:        drivers/crypto/hisilicon/sgl.c
 F:     include/linux/hisi_acc_qm.h
 
 HISILICON ROCE DRIVER
+M:     Chengchang Tang <tangchengchang@huawei.com>
 M:     Junxian Huang <huangjunxian6@hisilicon.com>
 L:     linux-rdma@vger.kernel.org
 S:     Maintained
index f9ab671c8eda556f6ac9aa72ae787392beb72edd..07c571c7b69992e21a6ff863b6ce7bfc2fc2355b 100644 (file)
@@ -96,12 +96,6 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
                return page_size;
        }
 
-       /* rdma_for_each_block() has a bug if the page size is smaller than the
-        * page size used to build the umem. For now prevent smaller page sizes
-        * from being returned.
-        */
-       pgsz_bitmap &= GENMASK(BITS_PER_LONG - 1, PAGE_SHIFT);
-
        /* The best result is the smallest page size that results in the minimum
         * number of required pages. Compute the largest page size that could
         * work based on VA address bits that don't change.
index 8a6da87f464b0d70e0c0e1adf45ed9e1a7c85a78..94a7f3b0c71cc2778b08af5c0afa06f7efad815b 100644 (file)
@@ -1971,7 +1971,7 @@ int ib_get_eth_speed(struct ib_device *dev, u32 port_num, u16 *speed, u8 *width)
        int rc;
        u32 netdev_speed;
        struct net_device *netdev;
-       struct ethtool_link_ksettings lksettings;
+       struct ethtool_link_ksettings lksettings = {};
 
        if (rdma_port_get_link_layer(dev, port_num) != IB_LINK_LAYER_ETHERNET)
                return -EINVAL;
index f79369c8360a5f615402287d2f8440469a24f3f6..a99c68247af0cc7d9e2274e20f40580bde90ed61 100644 (file)
@@ -71,7 +71,7 @@ static char version[] =
                BNXT_RE_DESC "\n";
 
 MODULE_AUTHOR("Eddie Wai <eddie.wai@broadcom.com>");
-MODULE_DESCRIPTION(BNXT_RE_DESC " Driver");
+MODULE_DESCRIPTION(BNXT_RE_DESC);
 MODULE_LICENSE("Dual BSD/GPL");
 
 /* globals */
index 0cd2612a49870f1e831f5a6b088aff3260ce2cb6..2bca9560f32ddd02628eb655c1be261489c726c9 100644 (file)
@@ -4760,10 +4760,15 @@ static int check_cong_type(struct ib_qp *ibqp,
                cong_alg->wnd_mode_sel = WND_LIMIT;
                break;
        default:
-               ibdev_err(&hr_dev->ib_dev,
-                         "error type(%u) for congestion selection.\n",
-                         hr_dev->caps.cong_type);
-               return -EINVAL;
+               ibdev_warn(&hr_dev->ib_dev,
+                          "invalid type(%u) for congestion selection.\n",
+                          hr_dev->caps.cong_type);
+               hr_dev->caps.cong_type = CONG_TYPE_DCQCN;
+               cong_alg->alg_sel = CONG_DCQCN;
+               cong_alg->alg_sub_sel = UNSUPPORT_CONG_LEVEL;
+               cong_alg->dip_vld = DIP_INVALID;
+               cong_alg->wnd_mode_sel = WND_LIMIT;
+               break;
        }
 
        return 0;
index 8fa7e4a18e737ae55243f0e160af0b2cbad2277c..bd4b2b89644442341226e6c5716f5ddb221ea1a1 100644 (file)
@@ -321,7 +321,11 @@ static void irdma_process_aeq(struct irdma_pci_f *rf)
                        break;
                case IRDMA_AE_QP_SUSPEND_COMPLETE:
                        if (iwqp->iwdev->vsi.tc_change_pending) {
-                               atomic_dec(&iwqp->sc_qp.vsi->qp_suspend_reqs);
+                               if (!atomic_dec_return(&qp->vsi->qp_suspend_reqs))
+                                       wake_up(&iwqp->iwdev->suspend_wq);
+                       }
+                       if (iwqp->suspend_pending) {
+                               iwqp->suspend_pending = false;
                                wake_up(&iwqp->iwdev->suspend_wq);
                        }
                        break;
@@ -581,9 +585,6 @@ static void irdma_destroy_cqp(struct irdma_pci_f *rf)
        struct irdma_cqp *cqp = &rf->cqp;
        int status = 0;
 
-       if (rf->cqp_cmpl_wq)
-               destroy_workqueue(rf->cqp_cmpl_wq);
-
        status = irdma_sc_cqp_destroy(dev->cqp);
        if (status)
                ibdev_dbg(to_ibdev(dev), "ERR: Destroy CQP failed %d\n", status);
@@ -748,6 +749,9 @@ static void irdma_destroy_ccq(struct irdma_pci_f *rf)
        struct irdma_ccq *ccq = &rf->ccq;
        int status = 0;
 
+       if (rf->cqp_cmpl_wq)
+               destroy_workqueue(rf->cqp_cmpl_wq);
+
        if (!rf->reset)
                status = irdma_sc_ccq_destroy(dev->ccq, 0, true);
        if (status)
@@ -1180,7 +1184,6 @@ static int irdma_create_ceq(struct irdma_pci_f *rf, struct irdma_ceq *iwceq,
        int status;
        struct irdma_ceq_init_info info = {};
        struct irdma_sc_dev *dev = &rf->sc_dev;
-       u64 scratch;
        u32 ceq_size;
 
        info.ceq_id = ceq_id;
@@ -1201,14 +1204,13 @@ static int irdma_create_ceq(struct irdma_pci_f *rf, struct irdma_ceq *iwceq,
        iwceq->sc_ceq.ceq_id = ceq_id;
        info.dev = dev;
        info.vsi = vsi;
-       scratch = (uintptr_t)&rf->cqp.sc_cqp;
        status = irdma_sc_ceq_init(&iwceq->sc_ceq, &info);
        if (!status) {
                if (dev->ceq_valid)
                        status = irdma_cqp_ceq_cmd(&rf->sc_dev, &iwceq->sc_ceq,
                                                   IRDMA_OP_CEQ_CREATE);
                else
-                       status = irdma_sc_cceq_create(&iwceq->sc_ceq, scratch);
+                       status = irdma_sc_cceq_create(&iwceq->sc_ceq, 0);
        }
 
        if (status) {
index 9ac48b4dab413d4f4eef6a51ebe7b3fd13e97a6c..3f13200ff71bc03a3f5e7817af12390eded1bc3b 100644 (file)
@@ -48,7 +48,7 @@ static void irdma_prep_tc_change(struct irdma_device *iwdev)
        /* Wait for all qp's to suspend */
        wait_event_timeout(iwdev->suspend_wq,
                           !atomic_read(&iwdev->vsi.qp_suspend_reqs),
-                          IRDMA_EVENT_TIMEOUT);
+                          msecs_to_jiffies(IRDMA_EVENT_TIMEOUT_MS));
        irdma_ws_reset(&iwdev->vsi);
 }
 
index d66d87bb8bc4d8935de8a2f7a498555b506e3ce0..b65bc2ea542f56b2e72bae1db491dd5969c620e2 100644 (file)
@@ -78,7 +78,7 @@ extern struct auxiliary_driver i40iw_auxiliary_drv;
 
 #define MAX_DPC_ITERATIONS     128
 
-#define IRDMA_EVENT_TIMEOUT            50000
+#define IRDMA_EVENT_TIMEOUT_MS         5000
 #define IRDMA_VCHNL_EVENT_TIMEOUT      100000
 #define IRDMA_RST_TIMEOUT_HZ           4
 
index 2138f0a2ff859ec20b55a46b87675d1d4fc30116..b5eb8d421988c1abd73cf4eb3a93adc6f2944089 100644 (file)
@@ -1157,6 +1157,21 @@ exit:
        return prio;
 }
 
+static int irdma_wait_for_suspend(struct irdma_qp *iwqp)
+{
+       if (!wait_event_timeout(iwqp->iwdev->suspend_wq,
+                               !iwqp->suspend_pending,
+                               msecs_to_jiffies(IRDMA_EVENT_TIMEOUT_MS))) {
+               iwqp->suspend_pending = false;
+               ibdev_warn(&iwqp->iwdev->ibdev,
+                          "modify_qp timed out waiting for suspend. qp_id = %d, last_ae = 0x%x\n",
+                          iwqp->ibqp.qp_num, iwqp->last_aeq);
+               return -EBUSY;
+       }
+
+       return 0;
+}
+
 /**
  * irdma_modify_qp_roce - modify qp request
  * @ibqp: qp's pointer for modify
@@ -1420,17 +1435,11 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 
                        info.next_iwarp_state = IRDMA_QP_STATE_SQD;
                        issue_modify_qp = 1;
+                       iwqp->suspend_pending = true;
                        break;
                case IB_QPS_SQE:
                case IB_QPS_ERR:
                case IB_QPS_RESET:
-                       if (iwqp->iwarp_state == IRDMA_QP_STATE_RTS) {
-                               spin_unlock_irqrestore(&iwqp->lock, flags);
-                               info.next_iwarp_state = IRDMA_QP_STATE_SQD;
-                               irdma_hw_modify_qp(iwdev, iwqp, &info, true);
-                               spin_lock_irqsave(&iwqp->lock, flags);
-                       }
-
                        if (iwqp->iwarp_state == IRDMA_QP_STATE_ERROR) {
                                spin_unlock_irqrestore(&iwqp->lock, flags);
                                if (udata && udata->inlen) {
@@ -1467,6 +1476,11 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr,
                        ctx_info->rem_endpoint_idx = udp_info->arp_idx;
                        if (irdma_hw_modify_qp(iwdev, iwqp, &info, true))
                                return -EINVAL;
+                       if (info.next_iwarp_state == IRDMA_QP_STATE_SQD) {
+                               ret = irdma_wait_for_suspend(iwqp);
+                               if (ret)
+                                       return ret;
+                       }
                        spin_lock_irqsave(&iwqp->lock, flags);
                        if (iwqp->iwarp_state == info.curr_iwarp_state) {
                                iwqp->iwarp_state = info.next_iwarp_state;
@@ -2900,7 +2914,7 @@ static struct irdma_mr *irdma_alloc_iwmr(struct ib_umem *region,
        iwmr->type = reg_type;
 
        pgsz_bitmap = (reg_type == IRDMA_MEMREG_TYPE_MEM) ?
-               iwdev->rf->sc_dev.hw_attrs.page_size_cap : PAGE_SIZE;
+               iwdev->rf->sc_dev.hw_attrs.page_size_cap : SZ_4K;
 
        iwmr->page_size = ib_umem_find_best_pgsz(region, pgsz_bitmap, virt);
        if (unlikely(!iwmr->page_size)) {
@@ -2932,6 +2946,11 @@ static int irdma_reg_user_mr_type_qp(struct irdma_mem_reg_req req,
        int err;
        u8 lvl;
 
+       /* iWarp: Catch page not starting on OS page boundary */
+       if (!rdma_protocol_roce(&iwdev->ibdev, 1) &&
+           ib_umem_offset(iwmr->region))
+               return -EINVAL;
+
        total = req.sq_pages + req.rq_pages + 1;
        if (total > iwmr->page_cnt)
                return -EINVAL;
index c42ac22de00e9372f7db62fdc5b8b691fac99c45..cfa140b36395ae9f49a9b928baa4d5a1a0aaf336 100644 (file)
@@ -198,6 +198,7 @@ struct irdma_qp {
        u8 flush_issued : 1;
        u8 sig_all : 1;
        u8 pau_mode : 1;
+       u8 suspend_pending : 1;
        u8 rsvd : 1;
        u8 iwarp_state;
        u16 term_sq_flush_code;
index 07261523c554735fd1ef3cf950a0e5a399b9cbaa..7f3167ce2972246447f7009110279313e8ca9194 100644 (file)
@@ -384,7 +384,7 @@ static void complete_rdma_req(struct rtrs_clt_io_req *req, int errno,
        struct rtrs_clt_path *clt_path;
        int err;
 
-       if (WARN_ON(!req->in_use))
+       if (!req->in_use)
                return;
        if (WARN_ON(!req->con))
                return;
@@ -1699,7 +1699,7 @@ static int create_con_cq_qp(struct rtrs_clt_con *con)
                clt_path->s.dev_ref++;
                max_send_wr = min_t(int, wr_limit,
                              /* QD * (REQ + RSP + FR REGS or INVS) + drain */
-                             clt_path->queue_depth * 3 + 1);
+                             clt_path->queue_depth * 4 + 1);
                max_recv_wr = min_t(int, wr_limit,
                              clt_path->queue_depth * 3 + 1);
                max_send_sge = 2;
@@ -2350,8 +2350,6 @@ static int init_conns(struct rtrs_clt_path *clt_path)
        if (err)
                goto destroy;
 
-       rtrs_start_hb(&clt_path->s);
-
        return 0;
 
 destroy:
@@ -2625,6 +2623,7 @@ static int init_path(struct rtrs_clt_path *clt_path)
                goto out;
        }
        rtrs_clt_path_up(clt_path);
+       rtrs_start_hb(&clt_path->s);
 out:
        mutex_unlock(&clt_path->init_mutex);
 
index 75e56604e4628622915340087665bc86e7db83eb..1d33efb8fb03be74be953c280a9df61fdbba6ac4 100644 (file)
@@ -65,8 +65,9 @@ static bool rtrs_srv_change_state(struct rtrs_srv_path *srv_path,
 {
        enum rtrs_srv_state old_state;
        bool changed = false;
+       unsigned long flags;
 
-       spin_lock_irq(&srv_path->state_lock);
+       spin_lock_irqsave(&srv_path->state_lock, flags);
        old_state = srv_path->state;
        switch (new_state) {
        case RTRS_SRV_CONNECTED:
@@ -87,7 +88,7 @@ static bool rtrs_srv_change_state(struct rtrs_srv_path *srv_path,
        }
        if (changed)
                srv_path->state = new_state;
-       spin_unlock_irq(&srv_path->state_lock);
+       spin_unlock_irqrestore(&srv_path->state_lock, flags);
 
        return changed;
 }
@@ -550,7 +551,10 @@ static void unmap_cont_bufs(struct rtrs_srv_path *srv_path)
                struct rtrs_srv_mr *srv_mr;
 
                srv_mr = &srv_path->mrs[i];
-               rtrs_iu_free(srv_mr->iu, srv_path->s.dev->ib_dev, 1);
+
+               if (always_invalidate)
+                       rtrs_iu_free(srv_mr->iu, srv_path->s.dev->ib_dev, 1);
+
                ib_dereg_mr(srv_mr->mr);
                ib_dma_unmap_sg(srv_path->s.dev->ib_dev, srv_mr->sgt.sgl,
                                srv_mr->sgt.nents, DMA_BIDIRECTIONAL);
@@ -709,20 +713,23 @@ static void rtrs_srv_info_rsp_done(struct ib_cq *cq, struct ib_wc *wc)
        WARN_ON(wc->opcode != IB_WC_SEND);
 }
 
-static void rtrs_srv_path_up(struct rtrs_srv_path *srv_path)
+static int rtrs_srv_path_up(struct rtrs_srv_path *srv_path)
 {
        struct rtrs_srv_sess *srv = srv_path->srv;
        struct rtrs_srv_ctx *ctx = srv->ctx;
-       int up;
+       int up, ret = 0;
 
        mutex_lock(&srv->paths_ev_mutex);
        up = ++srv->paths_up;
        if (up == 1)
-               ctx->ops.link_ev(srv, RTRS_SRV_LINK_EV_CONNECTED, NULL);
+               ret = ctx->ops.link_ev(srv, RTRS_SRV_LINK_EV_CONNECTED, NULL);
        mutex_unlock(&srv->paths_ev_mutex);
 
        /* Mark session as established */
-       srv_path->established = true;
+       if (!ret)
+               srv_path->established = true;
+
+       return ret;
 }
 
 static void rtrs_srv_path_down(struct rtrs_srv_path *srv_path)
@@ -851,7 +858,12 @@ static int process_info_req(struct rtrs_srv_con *con,
                goto iu_free;
        kobject_get(&srv_path->kobj);
        get_device(&srv_path->srv->dev);
-       rtrs_srv_change_state(srv_path, RTRS_SRV_CONNECTED);
+       err = rtrs_srv_change_state(srv_path, RTRS_SRV_CONNECTED);
+       if (!err) {
+               rtrs_err(s, "rtrs_srv_change_state(), err: %d\n", err);
+               goto iu_free;
+       }
+
        rtrs_srv_start_hb(srv_path);
 
        /*
@@ -860,7 +872,11 @@ static int process_info_req(struct rtrs_srv_con *con,
         * all connections are successfully established.  Thus, simply notify
         * listener with a proper event if we are the first path.
         */
-       rtrs_srv_path_up(srv_path);
+       err = rtrs_srv_path_up(srv_path);
+       if (err) {
+               rtrs_err(s, "rtrs_srv_path_up(), err: %d\n", err);
+               goto iu_free;
+       }
 
        ib_dma_sync_single_for_device(srv_path->s.dev->ib_dev,
                                      tx_iu->dma_addr,
@@ -1516,7 +1532,6 @@ static void rtrs_srv_close_work(struct work_struct *work)
 
        srv_path = container_of(work, typeof(*srv_path), close_work);
 
-       rtrs_srv_destroy_path_files(srv_path);
        rtrs_srv_stop_hb(srv_path);
 
        for (i = 0; i < srv_path->s.con_num; i++) {
@@ -1536,6 +1551,8 @@ static void rtrs_srv_close_work(struct work_struct *work)
        /* Wait for all completion */
        wait_for_completion(&srv_path->complete_done);
 
+       rtrs_srv_destroy_path_files(srv_path);
+
        /* Notify upper layer if we are the last path */
        rtrs_srv_path_down(srv_path);
 
index 95896472a82bfb8d9a5d4ebbcd6ebd6f5d445ca6..565a850445414d4de6fae1de3d0c96ea80b8f3ab 100644 (file)
@@ -77,6 +77,13 @@ static inline void __rdma_umem_block_iter_start(struct ib_block_iter *biter,
 {
        __rdma_block_iter_start(biter, umem->sgt_append.sgt.sgl,
                                umem->sgt_append.sgt.nents, pgsz);
+       biter->__sg_advance = ib_umem_offset(umem) & ~(pgsz - 1);
+       biter->__sg_numblocks = ib_umem_num_dma_blocks(umem, pgsz);
+}
+
+static inline bool __rdma_umem_block_iter_next(struct ib_block_iter *biter)
+{
+       return __rdma_block_iter_next(biter) && biter->__sg_numblocks--;
 }
 
 /**
@@ -92,7 +99,7 @@ static inline void __rdma_umem_block_iter_start(struct ib_block_iter *biter,
  */
 #define rdma_umem_for_each_dma_block(umem, biter, pgsz)                        \
        for (__rdma_umem_block_iter_start(biter, umem, pgsz);                  \
-            __rdma_block_iter_next(biter);)
+            __rdma_umem_block_iter_next(biter);)
 
 #ifdef CONFIG_INFINIBAND_USER_MEM
 
index fb1a2d6b196900d0b98ca454c4dbd2c97bfb9142..b7b6b58dd3486d98e5d641149b69dcf13e694292 100644 (file)
@@ -2850,6 +2850,7 @@ struct ib_block_iter {
        /* internal states */
        struct scatterlist *__sg;       /* sg holding the current aligned block */
        dma_addr_t __dma_addr;          /* unaligned DMA address of this block */
+       size_t __sg_numblocks;          /* ib_umem_num_dma_blocks() */
        unsigned int __sg_nents;        /* number of SG entries */
        unsigned int __sg_advance;      /* number of bytes to advance in sg in next step */
        unsigned int __pg_bit;          /* alignment of current block */