]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
drop a bunch of rdma patches from 6.1
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 6 Mar 2025 14:29:56 +0000 (15:29 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 6 Mar 2025 14:29:56 +0000 (15:29 +0100)
Not all were in 6.6 and the fixes for the fixes were getting just too
deep and messy to make it worth it.

27 files changed:
queue-6.1/ib-core-add-support-for-xdr-link-speed.patch [deleted file]
queue-6.1/rdma-mlx-calling-qp-event-handler-in-workqueue-conte.patch [deleted file]
queue-6.1/rdma-mlx5-add-work-to-remove-temporary-entries-from-.patch [deleted file]
queue-6.1/rdma-mlx5-cache-all-user-cacheable-mkeys-on-dereg-mr.patch [deleted file]
queue-6.1/rdma-mlx5-change-check-for-cacheable-mkeys.patch [deleted file]
queue-6.1/rdma-mlx5-change-the-cache-structure-to-an-rb-tree.patch [deleted file]
queue-6.1/rdma-mlx5-check-reg_create-create-for-errors.patch [deleted file]
queue-6.1/rdma-mlx5-don-t-keep-umrable-page_shift-in-cache-ent.patch [deleted file]
queue-6.1/rdma-mlx5-ensure-created-mkeys-always-have-a-populated-rb_key.patch [deleted file]
queue-6.1/rdma-mlx5-fix-ah-static-rate-parsing.patch [deleted file]
queue-6.1/rdma-mlx5-fix-assigning-access-flags-to-cache-mkeys.patch [deleted file]
queue-6.1/rdma-mlx5-fix-counter-update-on-mr-cache-mkey-creation.patch [deleted file]
queue-6.1/rdma-mlx5-fix-mkey-cache-possible-deadlock-on-cleanup.patch [deleted file]
queue-6.1/rdma-mlx5-fix-mkey-cache-wq-flush.patch [deleted file]
queue-6.1/rdma-mlx5-fix-mr-cache-debugfs-error-in-ib-representors-mode.patch [deleted file]
queue-6.1/rdma-mlx5-fix-mr-cache-temp-entries-cleanup.patch [deleted file]
queue-6.1/rdma-mlx5-fix-the-recovery-flow-of-the-umr-qp.patch [deleted file]
queue-6.1/rdma-mlx5-follow-rb_key.ats-when-creating-new-mkeys.patch [deleted file]
queue-6.1/rdma-mlx5-implement-mkeys-management-via-lifo-queue.patch [deleted file]
queue-6.1/rdma-mlx5-introduce-mlx5r_cache_rb_key.patch [deleted file]
queue-6.1/rdma-mlx5-limit-usage-of-over-sized-mkeys-from-the-mr-cache.patch [deleted file]
queue-6.1/rdma-mlx5-reduce-qp-table-exposure.patch [deleted file]
queue-6.1/rdma-mlx5-remove-extra-unlock-on-error-path.patch [deleted file]
queue-6.1/rdma-mlx5-remove-implicit-odp-cache-entry.patch [deleted file]
queue-6.1/rdma-mlx5-remove-not-used-cache-disable-flag.patch [deleted file]
queue-6.1/rdma-mlx5-uncacheable-mkey-has-neither-rb_key-or-cache_ent.patch [deleted file]
queue-6.1/series

diff --git a/queue-6.1/ib-core-add-support-for-xdr-link-speed.patch b/queue-6.1/ib-core-add-support-for-xdr-link-speed.patch
deleted file mode 100644 (file)
index 597def3..0000000
+++ /dev/null
@@ -1,131 +0,0 @@
-From 79cad3705d28ff0c133bcd85a9107d0dbbb27e72 Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Wed, 20 Sep 2023 13:07:40 +0300
-Subject: IB/core: Add support for XDR link speed
-
-From: Or Har-Toov <ohartoov@nvidia.com>
-
-[ Upstream commit 703289ce43f740b0096724300107df82d008552f ]
-
-Add new IBTA speed XDR, the new rate that was added to Infiniband spec
-as part of XDR and supporting signaling rate of 200Gb.
-
-In order to report that value to rdma-core, add new u32 field to
-query_port response.
-
-Signed-off-by: Or Har-Toov <ohartoov@nvidia.com>
-Reviewed-by: Mark Zhang <markzhang@nvidia.com>
-Link: https://lore.kernel.org/r/9d235fc600a999e8274010f0e18b40fa60540e6c.1695204156.git.leon@kernel.org
-Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
-Signed-off-by: Leon Romanovsky <leon@kernel.org>
-Stable-dep-of: c534ffda781f ("RDMA/mlx5: Fix AH static rate parsing")
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- drivers/infiniband/core/sysfs.c                   | 4 ++++
- drivers/infiniband/core/uverbs_std_types_device.c | 3 ++-
- drivers/infiniband/core/verbs.c                   | 3 +++
- include/rdma/ib_verbs.h                           | 2 ++
- include/uapi/rdma/ib_user_ioctl_verbs.h           | 3 ++-
- 5 files changed, 13 insertions(+), 2 deletions(-)
-
-diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
-index ec5efdc166601..9f97bef021497 100644
---- a/drivers/infiniband/core/sysfs.c
-+++ b/drivers/infiniband/core/sysfs.c
-@@ -342,6 +342,10 @@ static ssize_t rate_show(struct ib_device *ibdev, u32 port_num,
-               speed = " NDR";
-               rate = 1000;
-               break;
-+      case IB_SPEED_XDR:
-+              speed = " XDR";
-+              rate = 2000;
-+              break;
-       case IB_SPEED_SDR:
-       default:                /* default to SDR for invalid rates */
-               speed = " SDR";
-diff --git a/drivers/infiniband/core/uverbs_std_types_device.c b/drivers/infiniband/core/uverbs_std_types_device.c
-index 049684880ae03..fb0555647336f 100644
---- a/drivers/infiniband/core/uverbs_std_types_device.c
-+++ b/drivers/infiniband/core/uverbs_std_types_device.c
-@@ -203,6 +203,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_PORT)(
-       copy_port_attr_to_resp(&attr, &resp.legacy_resp, ib_dev, port_num);
-       resp.port_cap_flags2 = attr.port_cap_flags2;
-+      resp.active_speed_ex = attr.active_speed;
-       return uverbs_copy_to_struct_or_zero(attrs, UVERBS_ATTR_QUERY_PORT_RESP,
-                                            &resp, sizeof(resp));
-@@ -461,7 +462,7 @@ DECLARE_UVERBS_NAMED_METHOD(
-       UVERBS_ATTR_PTR_OUT(
-               UVERBS_ATTR_QUERY_PORT_RESP,
-               UVERBS_ATTR_STRUCT(struct ib_uverbs_query_port_resp_ex,
--                                 reserved),
-+                                 active_speed_ex),
-               UA_MANDATORY));
- DECLARE_UVERBS_NAMED_METHOD(
-diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
-index b99b3cc283b65..90848546f1704 100644
---- a/drivers/infiniband/core/verbs.c
-+++ b/drivers/infiniband/core/verbs.c
-@@ -147,6 +147,7 @@ __attribute_const__ int ib_rate_to_mult(enum ib_rate rate)
-       case IB_RATE_50_GBPS:  return  20;
-       case IB_RATE_400_GBPS: return 160;
-       case IB_RATE_600_GBPS: return 240;
-+      case IB_RATE_800_GBPS: return 320;
-       default:               return  -1;
-       }
- }
-@@ -176,6 +177,7 @@ __attribute_const__ enum ib_rate mult_to_ib_rate(int mult)
-       case 20:  return IB_RATE_50_GBPS;
-       case 160: return IB_RATE_400_GBPS;
-       case 240: return IB_RATE_600_GBPS;
-+      case 320: return IB_RATE_800_GBPS;
-       default:  return IB_RATE_PORT_CURRENT;
-       }
- }
-@@ -205,6 +207,7 @@ __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate)
-       case IB_RATE_50_GBPS:  return 53125;
-       case IB_RATE_400_GBPS: return 425000;
-       case IB_RATE_600_GBPS: return 637500;
-+      case IB_RATE_800_GBPS: return 850000;
-       default:               return -1;
-       }
- }
-diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
-index 68fd6d22adfd4..750effb875783 100644
---- a/include/rdma/ib_verbs.h
-+++ b/include/rdma/ib_verbs.h
-@@ -557,6 +557,7 @@ enum ib_port_speed {
-       IB_SPEED_EDR    = 32,
-       IB_SPEED_HDR    = 64,
-       IB_SPEED_NDR    = 128,
-+      IB_SPEED_XDR    = 256,
- };
- enum ib_stat_flag {
-@@ -836,6 +837,7 @@ enum ib_rate {
-       IB_RATE_50_GBPS  = 20,
-       IB_RATE_400_GBPS = 21,
-       IB_RATE_600_GBPS = 22,
-+      IB_RATE_800_GBPS = 23,
- };
- /**
-diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h
-index 7dd56210226f5..125fb9f0ef4ab 100644
---- a/include/uapi/rdma/ib_user_ioctl_verbs.h
-+++ b/include/uapi/rdma/ib_user_ioctl_verbs.h
-@@ -218,7 +218,8 @@ enum ib_uverbs_advise_mr_flag {
- struct ib_uverbs_query_port_resp_ex {
-       struct ib_uverbs_query_port_resp legacy_resp;
-       __u16 port_cap_flags2;
--      __u8  reserved[6];
-+      __u8  reserved[2];
-+      __u32 active_speed_ex;
- };
- struct ib_uverbs_qp_cap {
--- 
-2.39.5
-
diff --git a/queue-6.1/rdma-mlx-calling-qp-event-handler-in-workqueue-conte.patch b/queue-6.1/rdma-mlx-calling-qp-event-handler-in-workqueue-conte.patch
deleted file mode 100644 (file)
index 70a6ed3..0000000
+++ /dev/null
@@ -1,502 +0,0 @@
-From d5eccf1fd4fbdb90e3f1aba4e5ba5928ea3163c2 Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Wed, 4 Jan 2023 11:43:34 +0200
-Subject: RDMA/mlx: Calling qp event handler in workqueue context
-
-From: Mark Zhang <markzhang@nvidia.com>
-
-[ Upstream commit 312b8f79eb05479628ee71357749815b2eeeeea8 ]
-
-Move the call of qp event handler from atomic to workqueue context,
-so that the handler is able to block. This is needed by following
-patches.
-
-Signed-off-by: Mark Zhang <markzhang@nvidia.com>
-Reviewed-by: Patrisious Haddad <phaddad@nvidia.com>
-Link: https://lore.kernel.org/r/0cd17b8331e445f03942f4bb28d447f24ac5669d.1672821186.git.leonro@nvidia.com
-Signed-off-by: Leon Romanovsky <leon@kernel.org>
-Stable-dep-of: c534ffda781f ("RDMA/mlx5: Fix AH static rate parsing")
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- drivers/infiniband/hw/mlx4/main.c       |   8 ++
- drivers/infiniband/hw/mlx4/mlx4_ib.h    |   3 +
- drivers/infiniband/hw/mlx4/qp.c         | 121 +++++++++++++++++-------
- drivers/infiniband/hw/mlx5/main.c       |   7 ++
- drivers/infiniband/hw/mlx5/qp.c         | 119 ++++++++++++++++-------
- drivers/infiniband/hw/mlx5/qp.h         |   2 +
- drivers/infiniband/hw/mlx5/qpc.c        |   3 +-
- drivers/net/ethernet/mellanox/mlx4/qp.c |  14 ++-
- include/linux/mlx4/qp.h                 |   1 +
- include/rdma/ib_verbs.h                 |   2 +-
- 10 files changed, 202 insertions(+), 78 deletions(-)
-
-diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
-index 7c3dc86ab7f04..0f0b130cc8aac 100644
---- a/drivers/infiniband/hw/mlx4/main.c
-+++ b/drivers/infiniband/hw/mlx4/main.c
-@@ -3307,6 +3307,10 @@ static int __init mlx4_ib_init(void)
-       if (!wq)
-               return -ENOMEM;
-+      err = mlx4_ib_qp_event_init();
-+      if (err)
-+              goto clean_qp_event;
-+
-       err = mlx4_ib_cm_init();
-       if (err)
-               goto clean_wq;
-@@ -3328,6 +3332,9 @@ static int __init mlx4_ib_init(void)
-       mlx4_ib_cm_destroy();
- clean_wq:
-+      mlx4_ib_qp_event_cleanup();
-+
-+clean_qp_event:
-       destroy_workqueue(wq);
-       return err;
- }
-@@ -3337,6 +3344,7 @@ static void __exit mlx4_ib_cleanup(void)
-       mlx4_unregister_interface(&mlx4_ib_interface);
-       mlx4_ib_mcg_destroy();
-       mlx4_ib_cm_destroy();
-+      mlx4_ib_qp_event_cleanup();
-       destroy_workqueue(wq);
- }
-diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
-index 6a3b0f121045e..17fee1e73a45a 100644
---- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
-+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
-@@ -940,4 +940,7 @@ int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va,
- int mlx4_ib_cm_init(void);
- void mlx4_ib_cm_destroy(void);
-+int mlx4_ib_qp_event_init(void);
-+void mlx4_ib_qp_event_cleanup(void);
-+
- #endif /* MLX4_IB_H */
-diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
-index ac479e81ddee8..9d08aa99f3cb0 100644
---- a/drivers/infiniband/hw/mlx4/qp.c
-+++ b/drivers/infiniband/hw/mlx4/qp.c
-@@ -102,6 +102,14 @@ enum mlx4_ib_source_type {
-       MLX4_IB_RWQ_SRC = 1,
- };
-+struct mlx4_ib_qp_event_work {
-+      struct work_struct work;
-+      struct mlx4_qp *qp;
-+      enum mlx4_event type;
-+};
-+
-+static struct workqueue_struct *mlx4_ib_qp_event_wq;
-+
- static int is_tunnel_qp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
- {
-       if (!mlx4_is_master(dev->dev))
-@@ -200,50 +208,77 @@ static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n)
-       }
- }
-+static void mlx4_ib_handle_qp_event(struct work_struct *_work)
-+{
-+      struct mlx4_ib_qp_event_work *qpe_work =
-+              container_of(_work, struct mlx4_ib_qp_event_work, work);
-+      struct ib_qp *ibqp = &to_mibqp(qpe_work->qp)->ibqp;
-+      struct ib_event event = {};
-+
-+      event.device = ibqp->device;
-+      event.element.qp = ibqp;
-+
-+      switch (qpe_work->type) {
-+      case MLX4_EVENT_TYPE_PATH_MIG:
-+              event.event = IB_EVENT_PATH_MIG;
-+              break;
-+      case MLX4_EVENT_TYPE_COMM_EST:
-+              event.event = IB_EVENT_COMM_EST;
-+              break;
-+      case MLX4_EVENT_TYPE_SQ_DRAINED:
-+              event.event = IB_EVENT_SQ_DRAINED;
-+              break;
-+      case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE:
-+              event.event = IB_EVENT_QP_LAST_WQE_REACHED;
-+              break;
-+      case MLX4_EVENT_TYPE_WQ_CATAS_ERROR:
-+              event.event = IB_EVENT_QP_FATAL;
-+              break;
-+      case MLX4_EVENT_TYPE_PATH_MIG_FAILED:
-+              event.event = IB_EVENT_PATH_MIG_ERR;
-+              break;
-+      case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
-+              event.event = IB_EVENT_QP_REQ_ERR;
-+              break;
-+      case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR:
-+              event.event = IB_EVENT_QP_ACCESS_ERR;
-+              break;
-+      default:
-+              pr_warn("Unexpected event type %d on QP %06x\n",
-+                      qpe_work->type, qpe_work->qp->qpn);
-+              goto out;
-+      }
-+
-+      ibqp->event_handler(&event, ibqp->qp_context);
-+
-+out:
-+      mlx4_put_qp(qpe_work->qp);
-+      kfree(qpe_work);
-+}
-+
- static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type)
- {
--      struct ib_event event;
-       struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
-+      struct mlx4_ib_qp_event_work *qpe_work;
-       if (type == MLX4_EVENT_TYPE_PATH_MIG)
-               to_mibqp(qp)->port = to_mibqp(qp)->alt_port;
--      if (ibqp->event_handler) {
--              event.device     = ibqp->device;
--              event.element.qp = ibqp;
--              switch (type) {
--              case MLX4_EVENT_TYPE_PATH_MIG:
--                      event.event = IB_EVENT_PATH_MIG;
--                      break;
--              case MLX4_EVENT_TYPE_COMM_EST:
--                      event.event = IB_EVENT_COMM_EST;
--                      break;
--              case MLX4_EVENT_TYPE_SQ_DRAINED:
--                      event.event = IB_EVENT_SQ_DRAINED;
--                      break;
--              case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE:
--                      event.event = IB_EVENT_QP_LAST_WQE_REACHED;
--                      break;
--              case MLX4_EVENT_TYPE_WQ_CATAS_ERROR:
--                      event.event = IB_EVENT_QP_FATAL;
--                      break;
--              case MLX4_EVENT_TYPE_PATH_MIG_FAILED:
--                      event.event = IB_EVENT_PATH_MIG_ERR;
--                      break;
--              case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
--                      event.event = IB_EVENT_QP_REQ_ERR;
--                      break;
--              case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR:
--                      event.event = IB_EVENT_QP_ACCESS_ERR;
--                      break;
--              default:
--                      pr_warn("Unexpected event type %d "
--                             "on QP %06x\n", type, qp->qpn);
--                      return;
--              }
-+      if (!ibqp->event_handler)
-+              goto out_no_handler;
--              ibqp->event_handler(&event, ibqp->qp_context);
--      }
-+      qpe_work = kzalloc(sizeof(*qpe_work), GFP_ATOMIC);
-+      if (!qpe_work)
-+              goto out_no_handler;
-+
-+      qpe_work->qp = qp;
-+      qpe_work->type = type;
-+      INIT_WORK(&qpe_work->work, mlx4_ib_handle_qp_event);
-+      queue_work(mlx4_ib_qp_event_wq, &qpe_work->work);
-+      return;
-+
-+out_no_handler:
-+      mlx4_put_qp(qp);
- }
- static void mlx4_ib_wq_event(struct mlx4_qp *qp, enum mlx4_event type)
-@@ -4472,3 +4507,17 @@ void mlx4_ib_drain_rq(struct ib_qp *qp)
-       handle_drain_completion(cq, &rdrain, dev);
- }
-+
-+int mlx4_ib_qp_event_init(void)
-+{
-+      mlx4_ib_qp_event_wq = alloc_ordered_workqueue("mlx4_ib_qp_event_wq", 0);
-+      if (!mlx4_ib_qp_event_wq)
-+              return -ENOMEM;
-+
-+      return 0;
-+}
-+
-+void mlx4_ib_qp_event_cleanup(void)
-+{
-+      destroy_workqueue(mlx4_ib_qp_event_wq);
-+}
-diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
-index 45a414e8d35fa..a22649617e017 100644
---- a/drivers/infiniband/hw/mlx5/main.c
-+++ b/drivers/infiniband/hw/mlx5/main.c
-@@ -4410,6 +4410,10 @@ static int __init mlx5_ib_init(void)
-               return -ENOMEM;
-       }
-+      ret = mlx5_ib_qp_event_init();
-+      if (ret)
-+              goto qp_event_err;
-+
-       mlx5_ib_odp_init();
-       ret = mlx5r_rep_init();
-       if (ret)
-@@ -4427,6 +4431,8 @@ static int __init mlx5_ib_init(void)
- mp_err:
-       mlx5r_rep_cleanup();
- rep_err:
-+      mlx5_ib_qp_event_cleanup();
-+qp_event_err:
-       destroy_workqueue(mlx5_ib_event_wq);
-       free_page((unsigned long)xlt_emergency_page);
-       return ret;
-@@ -4438,6 +4444,7 @@ static void __exit mlx5_ib_cleanup(void)
-       auxiliary_driver_unregister(&mlx5r_mp_driver);
-       mlx5r_rep_cleanup();
-+      mlx5_ib_qp_event_cleanup();
-       destroy_workqueue(mlx5_ib_event_wq);
-       free_page((unsigned long)xlt_emergency_page);
- }
-diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
-index d782a494abcda..43c0123babd10 100644
---- a/drivers/infiniband/hw/mlx5/qp.c
-+++ b/drivers/infiniband/hw/mlx5/qp.c
-@@ -71,6 +71,14 @@ struct mlx5_modify_raw_qp_param {
-       u32 port;
- };
-+struct mlx5_ib_qp_event_work {
-+      struct work_struct work;
-+      struct mlx5_core_qp *qp;
-+      int type;
-+};
-+
-+static struct workqueue_struct *mlx5_ib_qp_event_wq;
-+
- static void get_cqs(enum ib_qp_type qp_type,
-                   struct ib_cq *ib_send_cq, struct ib_cq *ib_recv_cq,
-                   struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq);
-@@ -302,51 +310,78 @@ int mlx5_ib_read_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index, void *buffer,
-       return mlx5_ib_read_user_wqe_srq(srq, wqe_index, buffer, buflen, bc);
- }
-+static void mlx5_ib_handle_qp_event(struct work_struct *_work)
-+{
-+      struct mlx5_ib_qp_event_work *qpe_work =
-+              container_of(_work, struct mlx5_ib_qp_event_work, work);
-+      struct ib_qp *ibqp = &to_mibqp(qpe_work->qp)->ibqp;
-+      struct ib_event event = {};
-+
-+      event.device = ibqp->device;
-+      event.element.qp = ibqp;
-+      switch (qpe_work->type) {
-+      case MLX5_EVENT_TYPE_PATH_MIG:
-+              event.event = IB_EVENT_PATH_MIG;
-+              break;
-+      case MLX5_EVENT_TYPE_COMM_EST:
-+              event.event = IB_EVENT_COMM_EST;
-+              break;
-+      case MLX5_EVENT_TYPE_SQ_DRAINED:
-+              event.event = IB_EVENT_SQ_DRAINED;
-+              break;
-+      case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
-+              event.event = IB_EVENT_QP_LAST_WQE_REACHED;
-+              break;
-+      case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
-+              event.event = IB_EVENT_QP_FATAL;
-+              break;
-+      case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
-+              event.event = IB_EVENT_PATH_MIG_ERR;
-+              break;
-+      case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
-+              event.event = IB_EVENT_QP_REQ_ERR;
-+              break;
-+      case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
-+              event.event = IB_EVENT_QP_ACCESS_ERR;
-+              break;
-+      default:
-+              pr_warn("mlx5_ib: Unexpected event type %d on QP %06x\n",
-+                      qpe_work->type, qpe_work->qp->qpn);
-+              goto out;
-+      }
-+
-+      ibqp->event_handler(&event, ibqp->qp_context);
-+
-+out:
-+      mlx5_core_res_put(&qpe_work->qp->common);
-+      kfree(qpe_work);
-+}
-+
- static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type)
- {
-       struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
--      struct ib_event event;
-+      struct mlx5_ib_qp_event_work *qpe_work;
-       if (type == MLX5_EVENT_TYPE_PATH_MIG) {
-               /* This event is only valid for trans_qps */
-               to_mibqp(qp)->port = to_mibqp(qp)->trans_qp.alt_port;
-       }
--      if (ibqp->event_handler) {
--              event.device     = ibqp->device;
--              event.element.qp = ibqp;
--              switch (type) {
--              case MLX5_EVENT_TYPE_PATH_MIG:
--                      event.event = IB_EVENT_PATH_MIG;
--                      break;
--              case MLX5_EVENT_TYPE_COMM_EST:
--                      event.event = IB_EVENT_COMM_EST;
--                      break;
--              case MLX5_EVENT_TYPE_SQ_DRAINED:
--                      event.event = IB_EVENT_SQ_DRAINED;
--                      break;
--              case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
--                      event.event = IB_EVENT_QP_LAST_WQE_REACHED;
--                      break;
--              case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
--                      event.event = IB_EVENT_QP_FATAL;
--                      break;
--              case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
--                      event.event = IB_EVENT_PATH_MIG_ERR;
--                      break;
--              case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
--                      event.event = IB_EVENT_QP_REQ_ERR;
--                      break;
--              case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
--                      event.event = IB_EVENT_QP_ACCESS_ERR;
--                      break;
--              default:
--                      pr_warn("mlx5_ib: Unexpected event type %d on QP %06x\n", type, qp->qpn);
--                      return;
--              }
-+      if (!ibqp->event_handler)
-+              goto out_no_handler;
--              ibqp->event_handler(&event, ibqp->qp_context);
--      }
-+      qpe_work = kzalloc(sizeof(*qpe_work), GFP_ATOMIC);
-+      if (!qpe_work)
-+              goto out_no_handler;
-+
-+      qpe_work->qp = qp;
-+      qpe_work->type = type;
-+      INIT_WORK(&qpe_work->work, mlx5_ib_handle_qp_event);
-+      queue_work(mlx5_ib_qp_event_wq, &qpe_work->work);
-+      return;
-+
-+out_no_handler:
-+      mlx5_core_res_put(&qp->common);
- }
- static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap,
-@@ -5752,3 +5787,17 @@ int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter)
-       mutex_unlock(&mqp->mutex);
-       return err;
- }
-+
-+int mlx5_ib_qp_event_init(void)
-+{
-+      mlx5_ib_qp_event_wq = alloc_ordered_workqueue("mlx5_ib_qp_event_wq", 0);
-+      if (!mlx5_ib_qp_event_wq)
-+              return -ENOMEM;
-+
-+      return 0;
-+}
-+
-+void mlx5_ib_qp_event_cleanup(void)
-+{
-+      destroy_workqueue(mlx5_ib_qp_event_wq);
-+}
-diff --git a/drivers/infiniband/hw/mlx5/qp.h b/drivers/infiniband/hw/mlx5/qp.h
-index 5d4e140db99ce..fb2f4e030bb8f 100644
---- a/drivers/infiniband/hw/mlx5/qp.h
-+++ b/drivers/infiniband/hw/mlx5/qp.h
-@@ -44,4 +44,6 @@ void mlx5_core_res_put(struct mlx5_core_rsc_common *res);
- int mlx5_core_xrcd_alloc(struct mlx5_ib_dev *dev, u32 *xrcdn);
- int mlx5_core_xrcd_dealloc(struct mlx5_ib_dev *dev, u32 xrcdn);
- int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter);
-+int mlx5_ib_qp_event_init(void);
-+void mlx5_ib_qp_event_cleanup(void);
- #endif /* _MLX5_IB_QP_H */
-diff --git a/drivers/infiniband/hw/mlx5/qpc.c b/drivers/infiniband/hw/mlx5/qpc.c
-index d4e7864c56f18..a824ff22f4615 100644
---- a/drivers/infiniband/hw/mlx5/qpc.c
-+++ b/drivers/infiniband/hw/mlx5/qpc.c
-@@ -135,7 +135,8 @@ static int rsc_event_notifier(struct notifier_block *nb,
-       case MLX5_RES_SQ:
-               qp = (struct mlx5_core_qp *)common;
-               qp->event(qp, event_type);
--              break;
-+              /* Need to put resource in event handler */
-+              return NOTIFY_OK;
-       case MLX5_RES_DCT:
-               dct = (struct mlx5_core_dct *)common;
-               if (event_type == MLX5_EVENT_TYPE_DCT_DRAINED)
-diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
-index 48cfaa7eaf50c..913ed255990f4 100644
---- a/drivers/net/ethernet/mellanox/mlx4/qp.c
-+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
-@@ -46,6 +46,13 @@
- #define MLX4_BF_QP_SKIP_MASK  0xc0
- #define MLX4_MAX_BF_QP_RANGE  0x40
-+void mlx4_put_qp(struct mlx4_qp *qp)
-+{
-+      if (refcount_dec_and_test(&qp->refcount))
-+              complete(&qp->free);
-+}
-+EXPORT_SYMBOL_GPL(mlx4_put_qp);
-+
- void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type)
- {
-       struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
-@@ -64,10 +71,8 @@ void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type)
-               return;
-       }
-+      /* Need to call mlx4_put_qp() in event handler */
-       qp->event(qp, event_type);
--
--      if (refcount_dec_and_test(&qp->refcount))
--              complete(&qp->free);
- }
- /* used for INIT/CLOSE port logic */
-@@ -523,8 +528,7 @@ EXPORT_SYMBOL_GPL(mlx4_qp_remove);
- void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp)
- {
--      if (refcount_dec_and_test(&qp->refcount))
--              complete(&qp->free);
-+      mlx4_put_qp(qp);
-       wait_for_completion(&qp->free);
-       mlx4_qp_free_icm(dev, qp->qpn);
-diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
-index b6b626157b03a..b9a7b1319f5d3 100644
---- a/include/linux/mlx4/qp.h
-+++ b/include/linux/mlx4/qp.h
-@@ -504,4 +504,5 @@ static inline u16 folded_qp(u32 q)
- u16 mlx4_qp_roce_entropy(struct mlx4_dev *dev, u32 qpn);
-+void mlx4_put_qp(struct mlx4_qp *qp);
- #endif /* MLX4_QP_H */
-diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
-index 5582509003264..68fd6d22adfd4 100644
---- a/include/rdma/ib_verbs.h
-+++ b/include/rdma/ib_verbs.h
-@@ -1162,7 +1162,7 @@ enum ib_qp_create_flags {
-  */
- struct ib_qp_init_attr {
--      /* Consumer's event_handler callback must not block */
-+      /* This callback occurs in workqueue context */
-       void                  (*event_handler)(struct ib_event *, void *);
-       void                   *qp_context;
--- 
-2.39.5
-
diff --git a/queue-6.1/rdma-mlx5-add-work-to-remove-temporary-entries-from-.patch b/queue-6.1/rdma-mlx5-add-work-to-remove-temporary-entries-from-.patch
deleted file mode 100644 (file)
index 2eba6e0..0000000
+++ /dev/null
@@ -1,315 +0,0 @@
-From be147ad5b5dbf2b210768ce67d652ae3e1d6ddf1 Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Thu, 26 Jan 2023 00:28:07 +0200
-Subject: RDMA/mlx5: Add work to remove temporary entries from the cache
-
-From: Michael Guralnik <michaelgur@nvidia.com>
-
-[ Upstream commit 627122280c878cf5d3cda2d2c5a0a8f6a7e35cb7 ]
-
-The non-cache mkeys are stored in the cache only to shorten restarting
-application time. Don't store them longer than needed.
-
-Configure cache entries that store non-cache MRs as temporary entries.  If
-30 seconds have passed and no user reclaimed the temporarily cached mkeys,
-an asynchronous work will destroy the mkeys entries.
-
-Link: https://lore.kernel.org/r/20230125222807.6921-7-michaelgur@nvidia.com
-Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
-Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
-Stable-dep-of: d97505baea64 ("RDMA/mlx5: Fix the recovery flow of the UMR QP")
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- drivers/infiniband/hw/mlx5/mlx5_ib.h |  9 ++-
- drivers/infiniband/hw/mlx5/mr.c      | 94 ++++++++++++++++++++++------
- drivers/infiniband/hw/mlx5/odp.c     |  2 +-
- 3 files changed, 82 insertions(+), 23 deletions(-)
-
-diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
-index f345e2ae394d2..7c72e0e9db54a 100644
---- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
-+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
-@@ -770,6 +770,7 @@ struct mlx5_cache_ent {
-       struct rb_node          node;
-       struct mlx5r_cache_rb_key rb_key;
-+      u8 is_tmp:1;
-       u8 disabled:1;
-       u8 fill_to_high_water:1;
-@@ -803,6 +804,7 @@ struct mlx5_mkey_cache {
-       struct mutex            rb_lock;
-       struct dentry           *fs_root;
-       unsigned long           last_add;
-+      struct delayed_work     remove_ent_dwork;
- };
- struct mlx5_ib_port_resources {
-@@ -1346,9 +1348,10 @@ void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
- int mlx5_ib_get_cqe_size(struct ib_cq *ibcq);
- int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev);
- int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev);
--struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev,
--                                            struct mlx5r_cache_rb_key rb_key,
--                                            bool persistent_entry);
-+struct mlx5_cache_ent *
-+mlx5r_cache_create_ent_locked(struct mlx5_ib_dev *dev,
-+                            struct mlx5r_cache_rb_key rb_key,
-+                            bool persistent_entry);
- struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
-                                      int access_flags, int access_mode,
-diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
-index bf1ca7565be67..2c1a935734273 100644
---- a/drivers/infiniband/hw/mlx5/mr.c
-+++ b/drivers/infiniband/hw/mlx5/mr.c
-@@ -140,19 +140,16 @@ static void create_mkey_warn(struct mlx5_ib_dev *dev, int status, void *out)
-       mlx5_cmd_out_err(dev->mdev, MLX5_CMD_OP_CREATE_MKEY, 0, out);
- }
--
--static int push_mkey(struct mlx5_cache_ent *ent, bool limit_pendings,
--                   void *to_store)
-+static int push_mkey_locked(struct mlx5_cache_ent *ent, bool limit_pendings,
-+                          void *to_store)
- {
-       XA_STATE(xas, &ent->mkeys, 0);
-       void *curr;
--      xa_lock_irq(&ent->mkeys);
-       if (limit_pendings &&
--          (ent->reserved - ent->stored) > MAX_PENDING_REG_MR) {
--              xa_unlock_irq(&ent->mkeys);
-+          (ent->reserved - ent->stored) > MAX_PENDING_REG_MR)
-               return -EAGAIN;
--      }
-+
-       while (1) {
-               /*
-                * This is cmpxchg (NULL, XA_ZERO_ENTRY) however this version
-@@ -191,6 +188,7 @@ static int push_mkey(struct mlx5_cache_ent *ent, bool limit_pendings,
-                       break;
-               xa_lock_irq(&ent->mkeys);
-       }
-+      xa_lock_irq(&ent->mkeys);
-       if (xas_error(&xas))
-               return xas_error(&xas);
-       if (WARN_ON(curr))
-@@ -198,6 +196,17 @@ static int push_mkey(struct mlx5_cache_ent *ent, bool limit_pendings,
-       return 0;
- }
-+static int push_mkey(struct mlx5_cache_ent *ent, bool limit_pendings,
-+                   void *to_store)
-+{
-+      int ret;
-+
-+      xa_lock_irq(&ent->mkeys);
-+      ret = push_mkey_locked(ent, limit_pendings, to_store);
-+      xa_unlock_irq(&ent->mkeys);
-+      return ret;
-+}
-+
- static void undo_push_reserve_mkey(struct mlx5_cache_ent *ent)
- {
-       void *old;
-@@ -545,7 +554,7 @@ static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent)
- {
-       lockdep_assert_held(&ent->mkeys.xa_lock);
--      if (ent->disabled || READ_ONCE(ent->dev->fill_delay))
-+      if (ent->disabled || READ_ONCE(ent->dev->fill_delay) || ent->is_tmp)
-               return;
-       if (ent->stored < ent->limit) {
-               ent->fill_to_high_water = true;
-@@ -675,7 +684,6 @@ static int mlx5_cache_ent_insert(struct mlx5_mkey_cache *cache,
-       struct mlx5_cache_ent *cur;
-       int cmp;
--      mutex_lock(&cache->rb_lock);
-       /* Figure out where to put new node */
-       while (*new) {
-               cur = rb_entry(*new, struct mlx5_cache_ent, node);
-@@ -695,7 +703,6 @@ static int mlx5_cache_ent_insert(struct mlx5_mkey_cache *cache,
-       rb_link_node(&ent->node, parent, new);
-       rb_insert_color(&ent->node, &cache->rb_root);
--      mutex_unlock(&cache->rb_lock);
-       return 0;
- }
-@@ -867,9 +874,10 @@ static void delay_time_func(struct timer_list *t)
-       WRITE_ONCE(dev->fill_delay, 0);
- }
--struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev,
--                                            struct mlx5r_cache_rb_key rb_key,
--                                            bool persistent_entry)
-+struct mlx5_cache_ent *
-+mlx5r_cache_create_ent_locked(struct mlx5_ib_dev *dev,
-+                            struct mlx5r_cache_rb_key rb_key,
-+                            bool persistent_entry)
- {
-       struct mlx5_cache_ent *ent;
-       int order;
-@@ -882,6 +890,7 @@ struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev,
-       xa_init_flags(&ent->mkeys, XA_FLAGS_LOCK_IRQ);
-       ent->rb_key = rb_key;
-       ent->dev = dev;
-+      ent->is_tmp = !persistent_entry;
-       INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
-@@ -905,11 +914,44 @@ struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev,
-                       ent->limit = 0;
-               mlx5_mkey_cache_debugfs_add_ent(dev, ent);
-+      } else {
-+              mod_delayed_work(ent->dev->cache.wq,
-+                               &ent->dev->cache.remove_ent_dwork,
-+                               msecs_to_jiffies(30 * 1000));
-       }
-       return ent;
- }
-+static void remove_ent_work_func(struct work_struct *work)
-+{
-+      struct mlx5_mkey_cache *cache;
-+      struct mlx5_cache_ent *ent;
-+      struct rb_node *cur;
-+
-+      cache = container_of(work, struct mlx5_mkey_cache,
-+                           remove_ent_dwork.work);
-+      mutex_lock(&cache->rb_lock);
-+      cur = rb_last(&cache->rb_root);
-+      while (cur) {
-+              ent = rb_entry(cur, struct mlx5_cache_ent, node);
-+              cur = rb_prev(cur);
-+              mutex_unlock(&cache->rb_lock);
-+
-+              xa_lock_irq(&ent->mkeys);
-+              if (!ent->is_tmp) {
-+                      xa_unlock_irq(&ent->mkeys);
-+                      mutex_lock(&cache->rb_lock);
-+                      continue;
-+              }
-+              xa_unlock_irq(&ent->mkeys);
-+
-+              clean_keys(ent->dev, ent);
-+              mutex_lock(&cache->rb_lock);
-+      }
-+      mutex_unlock(&cache->rb_lock);
-+}
-+
- int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
- {
-       struct mlx5_mkey_cache *cache = &dev->cache;
-@@ -925,6 +967,7 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
-       mutex_init(&dev->slow_path_mutex);
-       mutex_init(&dev->cache.rb_lock);
-       dev->cache.rb_root = RB_ROOT;
-+      INIT_DELAYED_WORK(&dev->cache.remove_ent_dwork, remove_ent_work_func);
-       cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM);
-       if (!cache->wq) {
-               mlx5_ib_warn(dev, "failed to create work queue\n");
-@@ -934,9 +977,10 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
-       mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx);
-       timer_setup(&dev->delay_timer, delay_time_func, 0);
-       mlx5_mkey_cache_debugfs_init(dev);
-+      mutex_lock(&cache->rb_lock);
-       for (i = 0; i <= mkey_cache_max_order(dev); i++) {
-               rb_key.ndescs = 1 << (i + 2);
--              ent = mlx5r_cache_create_ent(dev, rb_key, true);
-+              ent = mlx5r_cache_create_ent_locked(dev, rb_key, true);
-               if (IS_ERR(ent)) {
-                       ret = PTR_ERR(ent);
-                       goto err;
-@@ -947,6 +991,7 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
-       if (ret)
-               goto err;
-+      mutex_unlock(&cache->rb_lock);
-       for (node = rb_first(root); node; node = rb_next(node)) {
-               ent = rb_entry(node, struct mlx5_cache_ent, node);
-               xa_lock_irq(&ent->mkeys);
-@@ -957,6 +1002,7 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
-       return 0;
- err:
-+      mutex_unlock(&cache->rb_lock);
-       mlx5_ib_warn(dev, "failed to create mkey cache entry\n");
-       return ret;
- }
-@@ -970,6 +1016,7 @@ int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev)
-       if (!dev->cache.wq)
-               return 0;
-+      cancel_delayed_work_sync(&dev->cache.remove_ent_dwork);
-       mutex_lock(&dev->cache.rb_lock);
-       for (node = rb_first(root); node; node = rb_next(node)) {
-               ent = rb_entry(node, struct mlx5_cache_ent, node);
-@@ -1752,33 +1799,42 @@ static int cache_ent_find_and_store(struct mlx5_ib_dev *dev,
- {
-       struct mlx5_mkey_cache *cache = &dev->cache;
-       struct mlx5_cache_ent *ent;
-+      int ret;
-       if (mr->mmkey.cache_ent) {
-               xa_lock_irq(&mr->mmkey.cache_ent->mkeys);
-               mr->mmkey.cache_ent->in_use--;
--              xa_unlock_irq(&mr->mmkey.cache_ent->mkeys);
-               goto end;
-       }
-       mutex_lock(&cache->rb_lock);
-       ent = mkey_cache_ent_from_rb_key(dev, mr->mmkey.rb_key);
--      mutex_unlock(&cache->rb_lock);
-       if (ent) {
-               if (ent->rb_key.ndescs == mr->mmkey.rb_key.ndescs) {
-+                      if (ent->disabled) {
-+                              mutex_unlock(&cache->rb_lock);
-+                              return -EOPNOTSUPP;
-+                      }
-                       mr->mmkey.cache_ent = ent;
-+                      xa_lock_irq(&mr->mmkey.cache_ent->mkeys);
-+                      mutex_unlock(&cache->rb_lock);
-                       goto end;
-               }
-       }
--      ent = mlx5r_cache_create_ent(dev, mr->mmkey.rb_key, false);
-+      ent = mlx5r_cache_create_ent_locked(dev, mr->mmkey.rb_key, false);
-+      mutex_unlock(&cache->rb_lock);
-       if (IS_ERR(ent))
-               return PTR_ERR(ent);
-       mr->mmkey.cache_ent = ent;
-+      xa_lock_irq(&mr->mmkey.cache_ent->mkeys);
- end:
--      return push_mkey(mr->mmkey.cache_ent, false,
--                       xa_mk_value(mr->mmkey.key));
-+      ret = push_mkey_locked(mr->mmkey.cache_ent, false,
-+                             xa_mk_value(mr->mmkey.key));
-+      xa_unlock_irq(&mr->mmkey.cache_ent->mkeys);
-+      return ret;
- }
- int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
-diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
-index 96d4faabbff8a..6ba4aa1afdc2d 100644
---- a/drivers/infiniband/hw/mlx5/odp.c
-+++ b/drivers/infiniband/hw/mlx5/odp.c
-@@ -1602,7 +1602,7 @@ int mlx5_odp_init_mkey_cache(struct mlx5_ib_dev *dev)
-       if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
-               return 0;
--      ent = mlx5r_cache_create_ent(dev, rb_key, true);
-+      ent = mlx5r_cache_create_ent_locked(dev, rb_key, true);
-       if (IS_ERR(ent))
-               return PTR_ERR(ent);
--- 
-2.39.5
-
diff --git a/queue-6.1/rdma-mlx5-cache-all-user-cacheable-mkeys-on-dereg-mr.patch b/queue-6.1/rdma-mlx5-cache-all-user-cacheable-mkeys-on-dereg-mr.patch
deleted file mode 100644 (file)
index c6c37af..0000000
+++ /dev/null
@@ -1,148 +0,0 @@
-From 5a09f0237455bc487c3d8cb78b82b7263d23d8fe Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Thu, 26 Jan 2023 00:28:06 +0200
-Subject: RDMA/mlx5: Cache all user cacheable mkeys on dereg MR flow
-
-From: Michael Guralnik <michaelgur@nvidia.com>
-
-[ Upstream commit dd1b913fb0d0e3e6d55e92d2319d954474dd66ac ]
-
-Currently, when dereging an MR, if the mkey doesn't belong to a cache
-entry, it will be destroyed.  As a result, the restart of applications
-with many non-cached mkeys is not efficient since all the mkeys are
-destroyed and then recreated.  This process takes a long time (for 100,000
-MRs, it is ~20 seconds for dereg and ~28 seconds for re-reg).
-
-To shorten the restart runtime, insert all cacheable mkeys to the cache.
-If there is no fitting entry to the mkey properties, create a temporary
-entry that fits it.
-
-After a predetermined timeout, the cache entries will shrink to the
-initial high limit.
-
-The mkeys will still be in the cache when consuming them again after an
-application restart. Therefore, the registration will be much faster
-(for 100,000 MRs, it is ~4 seconds for dereg and ~5 seconds for re-reg).
-
-The temporary cache entries created to store the non-cache mkeys are not
-exposed through sysfs like the default cache entries.
-
-Link: https://lore.kernel.org/r/20230125222807.6921-6-michaelgur@nvidia.com
-Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
-Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
-Stable-dep-of: d97505baea64 ("RDMA/mlx5: Fix the recovery flow of the UMR QP")
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- drivers/infiniband/hw/mlx5/mlx5_ib.h |  2 +
- drivers/infiniband/hw/mlx5/mr.c      | 55 +++++++++++++++++++++-------
- 2 files changed, 44 insertions(+), 13 deletions(-)
-
-diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
-index 7c9d5648947e9..f345e2ae394d2 100644
---- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
-+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
-@@ -650,6 +650,8 @@ struct mlx5_ib_mkey {
-       unsigned int ndescs;
-       struct wait_queue_head wait;
-       refcount_t usecount;
-+      /* User Mkey must hold either a rb_key or a cache_ent. */
-+      struct mlx5r_cache_rb_key rb_key;
-       struct mlx5_cache_ent *cache_ent;
- };
-diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
-index 1060b30a837a0..bf1ca7565be67 100644
---- a/drivers/infiniband/hw/mlx5/mr.c
-+++ b/drivers/infiniband/hw/mlx5/mr.c
-@@ -1110,15 +1110,14 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
-       rb_key.access_flags = get_unchangeable_access_flags(dev, access_flags);
-       ent = mkey_cache_ent_from_rb_key(dev, rb_key);
-       /*
--       * Matches access in alloc_cache_mr(). If the MR can't come from the
--       * cache then synchronously create an uncached one.
-+       * If the MR can't come from the cache then synchronously create an uncached
-+       * one.
-        */
--      if (!ent || ent->limit == 0 ||
--          !mlx5r_umr_can_reconfig(dev, 0, access_flags) ||
--          mlx5_umem_needs_ats(dev, umem, access_flags)) {
-+      if (!ent) {
-               mutex_lock(&dev->slow_path_mutex);
-               mr = reg_create(pd, umem, iova, access_flags, page_size, false);
-               mutex_unlock(&dev->slow_path_mutex);
-+              mr->mmkey.rb_key = rb_key;
-               return mr;
-       }
-@@ -1209,6 +1208,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
-               goto err_2;
-       }
-       mr->mmkey.type = MLX5_MKEY_MR;
-+      mr->mmkey.ndescs = get_octo_len(iova, umem->length, mr->page_shift);
-       mr->umem = umem;
-       set_mr_fields(dev, mr, umem->length, access_flags, iova);
-       kvfree(in);
-@@ -1747,6 +1747,40 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
-       }
- }
-+static int cache_ent_find_and_store(struct mlx5_ib_dev *dev,
-+                                  struct mlx5_ib_mr *mr)
-+{
-+      struct mlx5_mkey_cache *cache = &dev->cache;
-+      struct mlx5_cache_ent *ent;
-+
-+      if (mr->mmkey.cache_ent) {
-+              xa_lock_irq(&mr->mmkey.cache_ent->mkeys);
-+              mr->mmkey.cache_ent->in_use--;
-+              xa_unlock_irq(&mr->mmkey.cache_ent->mkeys);
-+              goto end;
-+      }
-+
-+      mutex_lock(&cache->rb_lock);
-+      ent = mkey_cache_ent_from_rb_key(dev, mr->mmkey.rb_key);
-+      mutex_unlock(&cache->rb_lock);
-+      if (ent) {
-+              if (ent->rb_key.ndescs == mr->mmkey.rb_key.ndescs) {
-+                      mr->mmkey.cache_ent = ent;
-+                      goto end;
-+              }
-+      }
-+
-+      ent = mlx5r_cache_create_ent(dev, mr->mmkey.rb_key, false);
-+      if (IS_ERR(ent))
-+              return PTR_ERR(ent);
-+
-+      mr->mmkey.cache_ent = ent;
-+
-+end:
-+      return push_mkey(mr->mmkey.cache_ent, false,
-+                       xa_mk_value(mr->mmkey.key));
-+}
-+
- int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
- {
-       struct mlx5_ib_mr *mr = to_mmr(ibmr);
-@@ -1792,16 +1826,11 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
-       }
-       /* Stop DMA */
--      if (mr->mmkey.cache_ent) {
--              xa_lock_irq(&mr->mmkey.cache_ent->mkeys);
--              mr->mmkey.cache_ent->in_use--;
--              xa_unlock_irq(&mr->mmkey.cache_ent->mkeys);
--
-+      if (mr->umem && mlx5r_umr_can_load_pas(dev, mr->umem->length))
-               if (mlx5r_umr_revoke_mr(mr) ||
--                  push_mkey(mr->mmkey.cache_ent, false,
--                            xa_mk_value(mr->mmkey.key)))
-+                  cache_ent_find_and_store(dev, mr))
-                       mr->mmkey.cache_ent = NULL;
--      }
-+
-       if (!mr->mmkey.cache_ent) {
-               rc = destroy_mkey(to_mdev(mr->ibmr.device), mr);
-               if (rc)
--- 
-2.39.5
-
diff --git a/queue-6.1/rdma-mlx5-change-check-for-cacheable-mkeys.patch b/queue-6.1/rdma-mlx5-change-check-for-cacheable-mkeys.patch
deleted file mode 100644 (file)
index c7b2df5..0000000
+++ /dev/null
@@ -1,96 +0,0 @@
-From 8c1185fef68cc603b954fece2a434c9f851d6a86 Mon Sep 17 00:00:00 2001
-From: Or Har-Toov <ohartoov@nvidia.com>
-Date: Wed, 3 Apr 2024 13:36:00 +0300
-Subject: RDMA/mlx5: Change check for cacheable mkeys
-
-From: Or Har-Toov <ohartoov@nvidia.com>
-
-commit 8c1185fef68cc603b954fece2a434c9f851d6a86 upstream.
-
-umem can be NULL for user application mkeys in some cases. Therefore
-umem can't be used for checking if the mkey is cacheable and it is
-changed for checking a flag that indicates it. Also make sure that
-all mkeys which are not returned to the cache will be destroyed.
-
-Fixes: dd1b913fb0d0 ("RDMA/mlx5: Cache all user cacheable mkeys on dereg MR flow")
-Signed-off-by: Or Har-Toov <ohartoov@nvidia.com>
-Link: https://lore.kernel.org/r/2690bc5c6896bcb937f89af16a1ff0343a7ab3d0.1712140377.git.leon@kernel.org
-Signed-off-by: Leon Romanovsky <leon@kernel.org>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- drivers/infiniband/hw/mlx5/mlx5_ib.h |    1 +
- drivers/infiniband/hw/mlx5/mr.c      |   32 ++++++++++++++++++++++----------
- 2 files changed, 23 insertions(+), 10 deletions(-)
-
---- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
-+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
-@@ -654,6 +654,7 @@ struct mlx5_ib_mkey {
-       /* Cacheable user Mkey must hold either a rb_key or a cache_ent. */
-       struct mlx5r_cache_rb_key rb_key;
-       struct mlx5_cache_ent *cache_ent;
-+      u8 cacheable : 1;
- };
- #define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)
---- a/drivers/infiniband/hw/mlx5/mr.c
-+++ b/drivers/infiniband/hw/mlx5/mr.c
-@@ -1155,6 +1155,7 @@ static struct mlx5_ib_mr *alloc_cacheabl
-               if (IS_ERR(mr))
-                       return mr;
-               mr->mmkey.rb_key = rb_key;
-+              mr->mmkey.cacheable = true;
-               return mr;
-       }
-@@ -1165,6 +1166,7 @@ static struct mlx5_ib_mr *alloc_cacheabl
-       mr->ibmr.pd = pd;
-       mr->umem = umem;
-       mr->page_shift = order_base_2(page_size);
-+      mr->mmkey.cacheable = true;
-       set_mr_fields(dev, mr, umem->length, access_flags, iova);
-       return mr;
-@@ -1830,6 +1832,23 @@ end:
-       return ret;
- }
-+static int mlx5_revoke_mr(struct mlx5_ib_mr *mr)
-+{
-+      struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
-+      struct mlx5_cache_ent *ent = mr->mmkey.cache_ent;
-+
-+      if (mr->mmkey.cacheable && !mlx5r_umr_revoke_mr(mr) && !cache_ent_find_and_store(dev, mr))
-+              return 0;
-+
-+      if (ent) {
-+              spin_lock_irq(&ent->mkeys_queue.lock);
-+              ent->in_use--;
-+              mr->mmkey.cache_ent = NULL;
-+              spin_unlock_irq(&ent->mkeys_queue.lock);
-+      }
-+      return destroy_mkey(dev, mr);
-+}
-+
- int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
- {
-       struct mlx5_ib_mr *mr = to_mmr(ibmr);
-@@ -1875,16 +1894,9 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr,
-       }
-       /* Stop DMA */
--      if (mr->umem && mlx5r_umr_can_load_pas(dev, mr->umem->length))
--              if (mlx5r_umr_revoke_mr(mr) ||
--                  cache_ent_find_and_store(dev, mr))
--                      mr->mmkey.cache_ent = NULL;
--
--      if (!mr->mmkey.cache_ent) {
--              rc = destroy_mkey(to_mdev(mr->ibmr.device), mr);
--              if (rc)
--                      return rc;
--      }
-+      rc = mlx5_revoke_mr(mr);
-+      if (rc)
-+              return rc;
-       if (mr->umem) {
-               bool is_odp = is_odp_mr(mr);
diff --git a/queue-6.1/rdma-mlx5-change-the-cache-structure-to-an-rb-tree.patch b/queue-6.1/rdma-mlx5-change-the-cache-structure-to-an-rb-tree.patch
deleted file mode 100644 (file)
index c76604c..0000000
+++ /dev/null
@@ -1,354 +0,0 @@
-From 3a78949c3d99afa32e87cf8cfe46723a057ee4cb Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Thu, 26 Jan 2023 00:28:04 +0200
-Subject: RDMA/mlx5: Change the cache structure to an RB-tree
-
-From: Michael Guralnik <michaelgur@nvidia.com>
-
-[ Upstream commit b9584517832858a0f78d6851d09b697a829514cd ]
-
-Currently, the cache structure is a static linear array. Therefore, his
-size is limited to the number of entries in it and is not expandable.  The
-entries are dedicated to mkeys of size 2^x and no access_flags. Mkeys with
-different properties are not cacheable.
-
-In this patch, we change the cache structure to an RB-tree.  This will
-allow to extend the cache to support more entries with different mkey
-properties.
-
-Link: https://lore.kernel.org/r/20230125222807.6921-4-michaelgur@nvidia.com
-Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
-Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
-Stable-dep-of: d97505baea64 ("RDMA/mlx5: Fix the recovery flow of the UMR QP")
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- drivers/infiniband/hw/mlx5/mlx5_ib.h |  11 +-
- drivers/infiniband/hw/mlx5/mr.c      | 160 ++++++++++++++++++++-------
- drivers/infiniband/hw/mlx5/odp.c     |   8 +-
- 3 files changed, 132 insertions(+), 47 deletions(-)
-
-diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
-index 10c87901da27c..bd998ac8c29c1 100644
---- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
-+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
-@@ -761,6 +761,8 @@ struct mlx5_cache_ent {
-       u32                     access_mode;
-       unsigned int            ndescs;
-+      struct rb_node          node;
-+
-       u8 disabled:1;
-       u8 fill_to_high_water:1;
-@@ -790,8 +792,9 @@ struct mlx5r_async_create_mkey {
- struct mlx5_mkey_cache {
-       struct workqueue_struct *wq;
--      struct mlx5_cache_ent   ent[MAX_MKEY_CACHE_ENTRIES];
--      struct dentry           *root;
-+      struct rb_root          rb_root;
-+      struct mutex            rb_lock;
-+      struct dentry           *fs_root;
-       unsigned long           last_add;
- };
-@@ -1336,11 +1339,15 @@ void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
- int mlx5_ib_get_cqe_size(struct ib_cq *ibcq);
- int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev);
- int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev);
-+struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev,
-+                                            int order);
- struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
-                                      struct mlx5_cache_ent *ent,
-                                      int access_flags);
-+struct mlx5_ib_mr *mlx5_mr_cache_alloc_order(struct mlx5_ib_dev *dev, u32 order,
-+                                           int access_flags);
- int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
-                           struct ib_mr_status *mr_status);
- struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
-diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
-index 53fadd6edb68d..b3d83920d3cfb 100644
---- a/drivers/infiniband/hw/mlx5/mr.c
-+++ b/drivers/infiniband/hw/mlx5/mr.c
-@@ -515,18 +515,22 @@ static const struct file_operations limit_fops = {
- static bool someone_adding(struct mlx5_mkey_cache *cache)
- {
--      unsigned int i;
--
--      for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) {
--              struct mlx5_cache_ent *ent = &cache->ent[i];
--              bool ret;
-+      struct mlx5_cache_ent *ent;
-+      struct rb_node *node;
-+      bool ret;
-+      mutex_lock(&cache->rb_lock);
-+      for (node = rb_first(&cache->rb_root); node; node = rb_next(node)) {
-+              ent = rb_entry(node, struct mlx5_cache_ent, node);
-               xa_lock_irq(&ent->mkeys);
-               ret = ent->stored < ent->limit;
-               xa_unlock_irq(&ent->mkeys);
--              if (ret)
-+              if (ret) {
-+                      mutex_unlock(&cache->rb_lock);
-                       return true;
-+              }
-       }
-+      mutex_unlock(&cache->rb_lock);
-       return false;
- }
-@@ -637,6 +641,59 @@ static void delayed_cache_work_func(struct work_struct *work)
-       __cache_work_func(ent);
- }
-+static int mlx5_cache_ent_insert(struct mlx5_mkey_cache *cache,
-+                               struct mlx5_cache_ent *ent)
-+{
-+      struct rb_node **new = &cache->rb_root.rb_node, *parent = NULL;
-+      struct mlx5_cache_ent *cur;
-+
-+      mutex_lock(&cache->rb_lock);
-+      /* Figure out where to put new node */
-+      while (*new) {
-+              cur = rb_entry(*new, struct mlx5_cache_ent, node);
-+              parent = *new;
-+              if (ent->order < cur->order)
-+                      new = &((*new)->rb_left);
-+              if (ent->order > cur->order)
-+                      new = &((*new)->rb_right);
-+              if (ent->order == cur->order) {
-+                      mutex_unlock(&cache->rb_lock);
-+                      return -EEXIST;
-+              }
-+      }
-+
-+      /* Add new node and rebalance tree. */
-+      rb_link_node(&ent->node, parent, new);
-+      rb_insert_color(&ent->node, &cache->rb_root);
-+
-+      mutex_unlock(&cache->rb_lock);
-+      return 0;
-+}
-+
-+static struct mlx5_cache_ent *mkey_cache_ent_from_order(struct mlx5_ib_dev *dev,
-+                                                      unsigned int order)
-+{
-+      struct rb_node *node = dev->cache.rb_root.rb_node;
-+      struct mlx5_cache_ent *cur, *smallest = NULL;
-+
-+      /*
-+       * Find the smallest ent with order >= requested_order.
-+       */
-+      while (node) {
-+              cur = rb_entry(node, struct mlx5_cache_ent, node);
-+              if (cur->order > order) {
-+                      smallest = cur;
-+                      node = node->rb_left;
-+              }
-+              if (cur->order < order)
-+                      node = node->rb_right;
-+              if (cur->order == order)
-+                      return cur;
-+      }
-+
-+      return smallest;
-+}
-+
- struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
-                                      struct mlx5_cache_ent *ent,
-                                      int access_flags)
-@@ -677,10 +734,16 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
-       return mr;
- }
--static void clean_keys(struct mlx5_ib_dev *dev, int c)
-+struct mlx5_ib_mr *mlx5_mr_cache_alloc_order(struct mlx5_ib_dev *dev,
-+                                           u32 order, int access_flags)
-+{
-+      struct mlx5_cache_ent *ent = mkey_cache_ent_from_order(dev, order);
-+
-+      return mlx5_mr_cache_alloc(dev, ent, access_flags);
-+}
-+
-+static void clean_keys(struct mlx5_ib_dev *dev, struct mlx5_cache_ent *ent)
- {
--      struct mlx5_mkey_cache *cache = &dev->cache;
--      struct mlx5_cache_ent *ent = &cache->ent[c];
-       u32 mkey;
-       cancel_delayed_work(&ent->dwork);
-@@ -699,8 +762,8 @@ static void mlx5_mkey_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
-       if (!mlx5_debugfs_root || dev->is_rep)
-               return;
--      debugfs_remove_recursive(dev->cache.root);
--      dev->cache.root = NULL;
-+      debugfs_remove_recursive(dev->cache.fs_root);
-+      dev->cache.fs_root = NULL;
- }
- static void mlx5_mkey_cache_debugfs_init(struct mlx5_ib_dev *dev)
-@@ -713,12 +776,13 @@ static void mlx5_mkey_cache_debugfs_init(struct mlx5_ib_dev *dev)
-       if (!mlx5_debugfs_root || dev->is_rep)
-               return;
--      cache->root = debugfs_create_dir("mr_cache", mlx5_debugfs_get_dev_root(dev->mdev));
-+      dir = mlx5_debugfs_get_dev_root(dev->mdev);
-+      cache->fs_root = debugfs_create_dir("mr_cache", dir);
-       for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) {
--              ent = &cache->ent[i];
-+              ent = mkey_cache_ent_from_order(dev, i);
-               sprintf(ent->name, "%d", ent->order);
--              dir = debugfs_create_dir(ent->name, cache->root);
-+              dir = debugfs_create_dir(ent->name, cache->fs_root);
-               debugfs_create_file("size", 0600, dir, ent, &size_fops);
-               debugfs_create_file("limit", 0600, dir, ent, &limit_fops);
-               debugfs_create_ulong("cur", 0400, dir, &ent->stored);
-@@ -733,6 +797,30 @@ static void delay_time_func(struct timer_list *t)
-       WRITE_ONCE(dev->fill_delay, 0);
- }
-+struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev,
-+                                            int order)
-+{
-+      struct mlx5_cache_ent *ent;
-+      int ret;
-+
-+      ent = kzalloc(sizeof(*ent), GFP_KERNEL);
-+      if (!ent)
-+              return ERR_PTR(-ENOMEM);
-+
-+      xa_init_flags(&ent->mkeys, XA_FLAGS_LOCK_IRQ);
-+      ent->order = order;
-+      ent->dev = dev;
-+
-+      INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
-+
-+      ret = mlx5_cache_ent_insert(&dev->cache, ent);
-+      if (ret) {
-+              kfree(ent);
-+              return ERR_PTR(ret);
-+      }
-+      return ent;
-+}
-+
- int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
- {
-       struct mlx5_mkey_cache *cache = &dev->cache;
-@@ -740,6 +828,8 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
-       int i;
-       mutex_init(&dev->slow_path_mutex);
-+      mutex_init(&dev->cache.rb_lock);
-+      dev->cache.rb_root = RB_ROOT;
-       cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM);
-       if (!cache->wq) {
-               mlx5_ib_warn(dev, "failed to create work queue\n");
-@@ -749,13 +839,7 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
-       mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx);
-       timer_setup(&dev->delay_timer, delay_time_func, 0);
-       for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) {
--              ent = &cache->ent[i];
--              xa_init_flags(&ent->mkeys, XA_FLAGS_LOCK_IRQ);
--              ent->order = i + 2;
--              ent->dev = dev;
--              ent->limit = 0;
--
--              INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
-+              ent = mlx5r_cache_create_ent(dev, i);
-               if (i > MKEY_CACHE_LAST_STD_ENTRY) {
-                       mlx5_odp_init_mkey_cache_entry(ent);
-@@ -785,14 +869,16 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
- int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev)
- {
--      unsigned int i;
-+      struct rb_root *root = &dev->cache.rb_root;
-+      struct mlx5_cache_ent *ent;
-+      struct rb_node *node;
-       if (!dev->cache.wq)
-               return 0;
--      for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) {
--              struct mlx5_cache_ent *ent = &dev->cache.ent[i];
--
-+      mutex_lock(&dev->cache.rb_lock);
-+      for (node = rb_first(root); node; node = rb_next(node)) {
-+              ent = rb_entry(node, struct mlx5_cache_ent, node);
-               xa_lock_irq(&ent->mkeys);
-               ent->disabled = true;
-               xa_unlock_irq(&ent->mkeys);
-@@ -802,8 +888,15 @@ int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev)
-       mlx5_mkey_cache_debugfs_cleanup(dev);
-       mlx5_cmd_cleanup_async_ctx(&dev->async_ctx);
--      for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++)
--              clean_keys(dev, i);
-+      node = rb_first(root);
-+      while (node) {
-+              ent = rb_entry(node, struct mlx5_cache_ent, node);
-+              node = rb_next(node);
-+              clean_keys(dev, ent);
-+              rb_erase(&ent->node, root);
-+              kfree(ent);
-+      }
-+      mutex_unlock(&dev->cache.rb_lock);
-       destroy_workqueue(dev->cache.wq);
-       del_timer_sync(&dev->delay_timer);
-@@ -876,19 +969,6 @@ static int mkey_cache_max_order(struct mlx5_ib_dev *dev)
-       return MLX5_MAX_UMR_SHIFT;
- }
--static struct mlx5_cache_ent *mkey_cache_ent_from_order(struct mlx5_ib_dev *dev,
--                                                      unsigned int order)
--{
--      struct mlx5_mkey_cache *cache = &dev->cache;
--
--      if (order < cache->ent[0].order)
--              return &cache->ent[0];
--      order = order - cache->ent[0].order;
--      if (order > MKEY_CACHE_LAST_STD_ENTRY)
--              return NULL;
--      return &cache->ent[order];
--}
--
- static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
-                         u64 length, int access_flags, u64 iova)
- {
-diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
-index 5f0a17382de73..7f68940ca0d1e 100644
---- a/drivers/infiniband/hw/mlx5/odp.c
-+++ b/drivers/infiniband/hw/mlx5/odp.c
-@@ -420,8 +420,7 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
-               return ERR_CAST(odp);
-       BUILD_BUG_ON(order > MKEY_CACHE_LAST_STD_ENTRY);
--      mr = mlx5_mr_cache_alloc(dev, &dev->cache.ent[order],
--                               imr->access_flags);
-+      mr = mlx5_mr_cache_alloc_order(dev, order, imr->access_flags);
-       if (IS_ERR(mr)) {
-               ib_umem_odp_release(odp);
-               return mr;
-@@ -495,9 +494,8 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
-       if (IS_ERR(umem_odp))
-               return ERR_CAST(umem_odp);
--      imr = mlx5_mr_cache_alloc(dev,
--                                &dev->cache.ent[MLX5_IMR_KSM_CACHE_ENTRY],
--                                access_flags);
-+      imr = mlx5_mr_cache_alloc_order(dev, MLX5_IMR_KSM_CACHE_ENTRY,
-+                                      access_flags);
-       if (IS_ERR(imr)) {
-               ib_umem_odp_release(umem_odp);
-               return imr;
--- 
-2.39.5
-
diff --git a/queue-6.1/rdma-mlx5-check-reg_create-create-for-errors.patch b/queue-6.1/rdma-mlx5-check-reg_create-create-for-errors.patch
deleted file mode 100644 (file)
index 666ce7f..0000000
+++ /dev/null
@@ -1,32 +0,0 @@
-From 8e6e49ccf1a0f2b3257394dc8610bb6d48859d3f Mon Sep 17 00:00:00 2001
-From: Dan Carpenter <error27@gmail.com>
-Date: Mon, 6 Feb 2023 17:40:35 +0300
-Subject: RDMA/mlx5: Check reg_create() create for errors
-
-From: Dan Carpenter <error27@gmail.com>
-
-commit 8e6e49ccf1a0f2b3257394dc8610bb6d48859d3f upstream.
-
-The reg_create() can fail.  Check for errors before dereferencing it.
-
-Fixes: dd1b913fb0d0 ("RDMA/mlx5: Cache all user cacheable mkeys on dereg MR flow")
-Signed-off-by: Dan Carpenter <error27@gmail.com>
-Link: https://lore.kernel.org/r/Y+ERYy4wN0LsKsm+@kili
-Reviewed-by: Devesh Sharma <devesh.s.sharma@oracle.com>
-Signed-off-by: Leon Romanovsky <leon@kernel.org>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- drivers/infiniband/hw/mlx5/mr.c |    2 ++
- 1 file changed, 2 insertions(+)
-
---- a/drivers/infiniband/hw/mlx5/mr.c
-+++ b/drivers/infiniband/hw/mlx5/mr.c
-@@ -1143,6 +1143,8 @@ static struct mlx5_ib_mr *alloc_cacheabl
-               mutex_lock(&dev->slow_path_mutex);
-               mr = reg_create(pd, umem, iova, access_flags, page_size, false);
-               mutex_unlock(&dev->slow_path_mutex);
-+              if (IS_ERR(mr))
-+                      return mr;
-               mr->mmkey.rb_key = rb_key;
-               return mr;
-       }
diff --git a/queue-6.1/rdma-mlx5-don-t-keep-umrable-page_shift-in-cache-ent.patch b/queue-6.1/rdma-mlx5-don-t-keep-umrable-page_shift-in-cache-ent.patch
deleted file mode 100644 (file)
index 2caa17c..0000000
+++ /dev/null
@@ -1,83 +0,0 @@
-From a85b91bcb6fce39a7511353461ead5a60b13bc69 Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Thu, 26 Jan 2023 00:28:02 +0200
-Subject: RDMA/mlx5: Don't keep umrable 'page_shift' in cache entries
-
-From: Aharon Landau <aharonl@nvidia.com>
-
-[ Upstream commit a2a88b8e22d1b202225d0e40b02ad068afab2ccb ]
-
-mkc.log_page_size can be changed using UMR. Therefore, don't treat it as a
-cache entry property.
-
-Removing it from struct mlx5_cache_ent.
-
-All cache mkeys will be created with default PAGE_SHIFT, and updated with
-the needed page_shift using UMR when passing them to a user.
-
-Link: https://lore.kernel.org/r/20230125222807.6921-2-michaelgur@nvidia.com
-Signed-off-by: Aharon Landau <aharonl@nvidia.com>
-Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
-Stable-dep-of: d97505baea64 ("RDMA/mlx5: Fix the recovery flow of the UMR QP")
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 -
- drivers/infiniband/hw/mlx5/mr.c      | 3 +--
- drivers/infiniband/hw/mlx5/odp.c     | 2 --
- 3 files changed, 1 insertion(+), 5 deletions(-)
-
-diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
-index 0ef347e91ffeb..10c87901da27c 100644
---- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
-+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
-@@ -759,7 +759,6 @@ struct mlx5_cache_ent {
-       char                    name[4];
-       u32                     order;
-       u32                     access_mode;
--      u32                     page;
-       unsigned int            ndescs;
-       u8 disabled:1;
-diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
-index b81b03aa2a629..53fadd6edb68d 100644
---- a/drivers/infiniband/hw/mlx5/mr.c
-+++ b/drivers/infiniband/hw/mlx5/mr.c
-@@ -297,7 +297,7 @@ static void set_cache_mkc(struct mlx5_cache_ent *ent, void *mkc)
-       MLX5_SET(mkc, mkc, translations_octword_size,
-                get_mkc_octo_size(ent->access_mode, ent->ndescs));
--      MLX5_SET(mkc, mkc, log_page_size, ent->page);
-+      MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
- }
- /* Asynchronously schedule new MRs to be populated in the cache. */
-@@ -765,7 +765,6 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
-               if (ent->order > mkey_cache_max_order(dev))
-                       continue;
--              ent->page = PAGE_SHIFT;
-               ent->ndescs = 1 << ent->order;
-               ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
-               if ((dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) &&
-diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
-index 87fbee8061003..a5c9baec8be85 100644
---- a/drivers/infiniband/hw/mlx5/odp.c
-+++ b/drivers/infiniband/hw/mlx5/odp.c
-@@ -1598,14 +1598,12 @@ void mlx5_odp_init_mkey_cache_entry(struct mlx5_cache_ent *ent)
-       switch (ent->order - 2) {
-       case MLX5_IMR_MTT_CACHE_ENTRY:
--              ent->page = PAGE_SHIFT;
-               ent->ndescs = MLX5_IMR_MTT_ENTRIES;
-               ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
-               ent->limit = 0;
-               break;
-       case MLX5_IMR_KSM_CACHE_ENTRY:
--              ent->page = MLX5_KSM_PAGE_SHIFT;
-               ent->ndescs = mlx5_imr_ksm_entries;
-               ent->access_mode = MLX5_MKC_ACCESS_MODE_KSM;
-               ent->limit = 0;
--- 
-2.39.5
-
diff --git a/queue-6.1/rdma-mlx5-ensure-created-mkeys-always-have-a-populated-rb_key.patch b/queue-6.1/rdma-mlx5-ensure-created-mkeys-always-have-a-populated-rb_key.patch
deleted file mode 100644 (file)
index add9d0b..0000000
+++ /dev/null
@@ -1,49 +0,0 @@
-From 2e4c02fdecf2f6f55cefe48cb82d93fa4f8e2204 Mon Sep 17 00:00:00 2001
-From: Jason Gunthorpe <jgg@nvidia.com>
-Date: Tue, 28 May 2024 15:52:54 +0300
-Subject: RDMA/mlx5: Ensure created mkeys always have a populated rb_key
-
-From: Jason Gunthorpe <jgg@nvidia.com>
-
-commit 2e4c02fdecf2f6f55cefe48cb82d93fa4f8e2204 upstream.
-
-cachable and mmkey.rb_key together are used by mlx5_revoke_mr() to put the
-MR/mkey back into the cache. In all cases they should be set correctly.
-
-alloc_cacheable_mr() was setting cachable but not filling rb_key,
-resulting in cache_ent_find_and_store() bucketing them all into a 0 length
-entry.
-
-implicit_get_child_mr()/mlx5_ib_alloc_implicit_mr() failed to set cachable
-or rb_key at all, so the cache was not working at all for implicit ODP.
-
-Cc: stable@vger.kernel.org
-Fixes: 8c1185fef68c ("RDMA/mlx5: Change check for cacheable mkeys")
-Fixes: dd1b913fb0d0 ("RDMA/mlx5: Cache all user cacheable mkeys on dereg MR flow")
-Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
-Link: https://lore.kernel.org/r/7778c02dfa0999a30d6746c79a23dd7140a9c729.1716900410.git.leon@kernel.org
-Signed-off-by: Leon Romanovsky <leon@kernel.org>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- drivers/infiniband/hw/mlx5/mr.c |    3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
---- a/drivers/infiniband/hw/mlx5/mr.c
-+++ b/drivers/infiniband/hw/mlx5/mr.c
-@@ -715,6 +715,8 @@ static struct mlx5_ib_mr *_mlx5_mr_cache
-       }
-       mr->mmkey.cache_ent = ent;
-       mr->mmkey.type = MLX5_MKEY_MR;
-+      mr->mmkey.rb_key = ent->rb_key;
-+      mr->mmkey.cacheable = true;
-       init_waitqueue_head(&mr->mmkey.wait);
-       return mr;
- }
-@@ -1165,7 +1167,6 @@ static struct mlx5_ib_mr *alloc_cacheabl
-       mr->ibmr.pd = pd;
-       mr->umem = umem;
-       mr->page_shift = order_base_2(page_size);
--      mr->mmkey.cacheable = true;
-       set_mr_fields(dev, mr, umem->length, access_flags, iova);
-       return mr;
diff --git a/queue-6.1/rdma-mlx5-fix-ah-static-rate-parsing.patch b/queue-6.1/rdma-mlx5-fix-ah-static-rate-parsing.patch
deleted file mode 100644 (file)
index 7c87470..0000000
+++ /dev/null
@@ -1,84 +0,0 @@
-From b79f406d4cc08e99e836a5e95040672efdba5313 Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Mon, 10 Feb 2025 13:32:39 +0200
-Subject: RDMA/mlx5: Fix AH static rate parsing
-
-From: Patrisious Haddad <phaddad@nvidia.com>
-
-[ Upstream commit c534ffda781f44a1c6ac25ef6e0e444da38ca8af ]
-
-Previously static rate wasn't translated according to our PRM but simply
-used the 4 lower bytes.
-
-Correctly translate static rate value passed in AH creation attribute
-according to our PRM expected values.
-
-In addition change 800GB mapping to zero, which is the PRM
-specified value.
-
-Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters")
-Signed-off-by: Patrisious Haddad <phaddad@nvidia.com>
-Reviewed-by: Maor Gottlieb <maorg@nvidia.com>
-Link: https://patch.msgid.link/18ef4cc5396caf80728341eb74738cd777596f60.1739187089.git.leon@kernel.org
-Signed-off-by: Leon Romanovsky <leon@kernel.org>
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- drivers/infiniband/hw/mlx5/ah.c | 3 ++-
- drivers/infiniband/hw/mlx5/qp.c | 6 +++---
- drivers/infiniband/hw/mlx5/qp.h | 1 +
- 3 files changed, 6 insertions(+), 4 deletions(-)
-
-diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c
-index 505bc47fd575d..99036afb3aef0 100644
---- a/drivers/infiniband/hw/mlx5/ah.c
-+++ b/drivers/infiniband/hw/mlx5/ah.c
-@@ -67,7 +67,8 @@ static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah,
-               ah->av.tclass = grh->traffic_class;
-       }
--      ah->av.stat_rate_sl = (rdma_ah_get_static_rate(ah_attr) << 4);
-+      ah->av.stat_rate_sl =
-+              (mlx5r_ib_rate(dev, rdma_ah_get_static_rate(ah_attr)) << 4);
-       if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) {
-               if (init_attr->xmit_slave)
-diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
-index 43c0123babd10..59dca0cd89052 100644
---- a/drivers/infiniband/hw/mlx5/qp.c
-+++ b/drivers/infiniband/hw/mlx5/qp.c
-@@ -3379,11 +3379,11 @@ static int ib_to_mlx5_rate_map(u8 rate)
-       return 0;
- }
--static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
-+int mlx5r_ib_rate(struct mlx5_ib_dev *dev, u8 rate)
- {
-       u32 stat_rate_support;
--      if (rate == IB_RATE_PORT_CURRENT)
-+      if (rate == IB_RATE_PORT_CURRENT || rate == IB_RATE_800_GBPS)
-               return 0;
-       if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_600_GBPS)
-@@ -3528,7 +3528,7 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
-                      sizeof(grh->dgid.raw));
-       }
--      err = ib_rate_to_mlx5(dev, rdma_ah_get_static_rate(ah));
-+      err = mlx5r_ib_rate(dev, rdma_ah_get_static_rate(ah));
-       if (err < 0)
-               return err;
-       MLX5_SET(ads, path, stat_rate, err);
-diff --git a/drivers/infiniband/hw/mlx5/qp.h b/drivers/infiniband/hw/mlx5/qp.h
-index e677fa0ca4226..4abb77d551670 100644
---- a/drivers/infiniband/hw/mlx5/qp.h
-+++ b/drivers/infiniband/hw/mlx5/qp.h
-@@ -55,4 +55,5 @@ int mlx5_core_xrcd_dealloc(struct mlx5_ib_dev *dev, u32 xrcdn);
- int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter);
- int mlx5_ib_qp_event_init(void);
- void mlx5_ib_qp_event_cleanup(void);
-+int mlx5r_ib_rate(struct mlx5_ib_dev *dev, u8 rate);
- #endif /* _MLX5_IB_QP_H */
--- 
-2.39.5
-
diff --git a/queue-6.1/rdma-mlx5-fix-assigning-access-flags-to-cache-mkeys.patch b/queue-6.1/rdma-mlx5-fix-assigning-access-flags-to-cache-mkeys.patch
deleted file mode 100644 (file)
index 840c967..0000000
+++ /dev/null
@@ -1,46 +0,0 @@
-From 4f14c6c0213e1def48f0f887d35f44095416c67d Mon Sep 17 00:00:00 2001
-From: Michael Guralnik <michaelgur@nvidia.com>
-Date: Wed, 20 Sep 2023 13:01:54 +0300
-Subject: RDMA/mlx5: Fix assigning access flags to cache mkeys
-
-From: Michael Guralnik <michaelgur@nvidia.com>
-
-commit 4f14c6c0213e1def48f0f887d35f44095416c67d upstream.
-
-After the change to use dynamic cache structure, new cache entries
-can be added and the mkey allocation can no longer assume that all
-mkeys created for the cache have access_flags equal to zero.
-
-Example of a flow that exposes the issue:
-A user registers MR with RO on a HCA that cannot UMR RO and the mkey is
-created outside of the cache. When the user deregisters the MR, a new
-cache entry is created to store mkeys with RO.
-
-Later, the user registers 2 MRs with RO. The first MR is reused from the
-new cache entry. When we try to get the second mkey from the cache we see
-the entry is empty so we go to the MR cache mkey allocation flow which
-would have allocated a mkey with no access flags, resulting the user getting
-a MR without RO.
-
-Fixes: dd1b913fb0d0 ("RDMA/mlx5: Cache all user cacheable mkeys on dereg MR flow")
-Reviewed-by: Edward Srouji <edwards@nvidia.com>
-Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
-Link: https://lore.kernel.org/r/8a802700b82def3ace3f77cd7a9ad9d734af87e7.1695203958.git.leonro@nvidia.com
-Signed-off-by: Leon Romanovsky <leon@kernel.org>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- drivers/infiniband/hw/mlx5/mr.c |    3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
---- a/drivers/infiniband/hw/mlx5/mr.c
-+++ b/drivers/infiniband/hw/mlx5/mr.c
-@@ -236,7 +236,8 @@ static int get_mkc_octo_size(unsigned in
- static void set_cache_mkc(struct mlx5_cache_ent *ent, void *mkc)
- {
--      set_mkc_access_pd_addr_fields(mkc, 0, 0, ent->dev->umrc.pd);
-+      set_mkc_access_pd_addr_fields(mkc, ent->rb_key.access_flags, 0,
-+                                    ent->dev->umrc.pd);
-       MLX5_SET(mkc, mkc, free, 1);
-       MLX5_SET(mkc, mkc, umr_en, 1);
-       MLX5_SET(mkc, mkc, access_mode_1_0, ent->rb_key.access_mode & 0x3);
diff --git a/queue-6.1/rdma-mlx5-fix-counter-update-on-mr-cache-mkey-creation.patch b/queue-6.1/rdma-mlx5-fix-counter-update-on-mr-cache-mkey-creation.patch
deleted file mode 100644 (file)
index 2d8a926..0000000
+++ /dev/null
@@ -1,41 +0,0 @@
-From 6f5cd6ac9a4201e4ba6f10b76a9da8044d6e38b0 Mon Sep 17 00:00:00 2001
-From: Michael Guralnik <michaelgur@nvidia.com>
-Date: Tue, 3 Sep 2024 14:24:48 +0300
-Subject: RDMA/mlx5: Fix counter update on MR cache mkey creation
-
-From: Michael Guralnik <michaelgur@nvidia.com>
-
-commit 6f5cd6ac9a4201e4ba6f10b76a9da8044d6e38b0 upstream.
-
-After an mkey is created, update the counter for pending mkeys before
-reshceduling the work that is filling the cache.
-
-Rescheduling the work with a full MR cache entry and a wrong 'pending'
-counter will cause us to miss disabling the fill_to_high_water flag.
-Thus leaving the cache full but with an indication that it's still
-needs to be filled up to it's full size (2 * limit).
-Next time an mkey will be taken from the cache, we'll unnecessarily
-continue the process of filling the cache to it's full size.
-
-Fixes: 57e7071683ef ("RDMA/mlx5: Implement mkeys management via LIFO queue")
-Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
-Link: https://patch.msgid.link/0f44f462ba22e45f72cb3d0ec6a748634086b8d0.1725362530.git.leon@kernel.org
-Signed-off-by: Leon Romanovsky <leon@kernel.org>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- drivers/infiniband/hw/mlx5/mr.c |    2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
---- a/drivers/infiniband/hw/mlx5/mr.c
-+++ b/drivers/infiniband/hw/mlx5/mr.c
-@@ -208,9 +208,9 @@ static void create_mkey_callback(int sta
-       spin_lock_irqsave(&ent->mkeys_queue.lock, flags);
-       push_mkey_locked(ent, mkey_out->mkey);
-+      ent->pending--;
-       /* If we are doing fill_to_high_water then keep going. */
-       queue_adjust_cache_locked(ent);
--      ent->pending--;
-       spin_unlock_irqrestore(&ent->mkeys_queue.lock, flags);
-       kfree(mkey_out);
- }
diff --git a/queue-6.1/rdma-mlx5-fix-mkey-cache-possible-deadlock-on-cleanup.patch b/queue-6.1/rdma-mlx5-fix-mkey-cache-possible-deadlock-on-cleanup.patch
deleted file mode 100644 (file)
index 731d75d..0000000
+++ /dev/null
@@ -1,151 +0,0 @@
-From 374012b0045780b7ad498be62e85153009bb7fe9 Mon Sep 17 00:00:00 2001
-From: Shay Drory <shayd@nvidia.com>
-Date: Tue, 12 Sep 2023 13:07:45 +0300
-Subject: RDMA/mlx5: Fix mkey cache possible deadlock on cleanup
-
-From: Shay Drory <shayd@nvidia.com>
-
-commit 374012b0045780b7ad498be62e85153009bb7fe9 upstream.
-
-Fix the deadlock by refactoring the MR cache cleanup flow to flush the
-workqueue without holding the rb_lock.
-This adds a race between cache cleanup and creation of new entries which
-we solve by denied creation of new entries after cache cleanup started.
-
-Lockdep:
-WARNING: possible circular locking dependency detected
- [ 2785.326074 ] 6.2.0-rc6_for_upstream_debug_2023_01_31_14_02 #1 Not tainted
- [ 2785.339778 ] ------------------------------------------------------
- [ 2785.340848 ] devlink/53872 is trying to acquire lock:
- [ 2785.341701 ] ffff888124f8c0c8 ((work_completion)(&(&ent->dwork)->work)){+.+.}-{0:0}, at: __flush_work+0xc8/0x900
- [ 2785.343403 ]
- [ 2785.343403 ] but task is already holding lock:
- [ 2785.344464 ] ffff88817e8f1260 (&dev->cache.rb_lock){+.+.}-{3:3}, at: mlx5_mkey_cache_cleanup+0x77/0x250 [mlx5_ib]
- [ 2785.346273 ]
- [ 2785.346273 ] which lock already depends on the new lock.
- [ 2785.346273 ]
- [ 2785.347720 ]
- [ 2785.347720 ] the existing dependency chain (in reverse order) is:
- [ 2785.349003 ]
- [ 2785.349003 ] -> #1 (&dev->cache.rb_lock){+.+.}-{3:3}:
- [ 2785.350160 ]        __mutex_lock+0x14c/0x15c0
- [ 2785.350962 ]        delayed_cache_work_func+0x2d1/0x610 [mlx5_ib]
- [ 2785.352044 ]        process_one_work+0x7c2/0x1310
- [ 2785.352879 ]        worker_thread+0x59d/0xec0
- [ 2785.353636 ]        kthread+0x28f/0x330
- [ 2785.354370 ]        ret_from_fork+0x1f/0x30
- [ 2785.355135 ]
- [ 2785.355135 ] -> #0 ((work_completion)(&(&ent->dwork)->work)){+.+.}-{0:0}:
- [ 2785.356515 ]        __lock_acquire+0x2d8a/0x5fe0
- [ 2785.357349 ]        lock_acquire+0x1c1/0x540
- [ 2785.358121 ]        __flush_work+0xe8/0x900
- [ 2785.358852 ]        __cancel_work_timer+0x2c7/0x3f0
- [ 2785.359711 ]        mlx5_mkey_cache_cleanup+0xfb/0x250 [mlx5_ib]
- [ 2785.360781 ]        mlx5_ib_stage_pre_ib_reg_umr_cleanup+0x16/0x30 [mlx5_ib]
- [ 2785.361969 ]        __mlx5_ib_remove+0x68/0x120 [mlx5_ib]
- [ 2785.362960 ]        mlx5r_remove+0x63/0x80 [mlx5_ib]
- [ 2785.363870 ]        auxiliary_bus_remove+0x52/0x70
- [ 2785.364715 ]        device_release_driver_internal+0x3c1/0x600
- [ 2785.365695 ]        bus_remove_device+0x2a5/0x560
- [ 2785.366525 ]        device_del+0x492/0xb80
- [ 2785.367276 ]        mlx5_detach_device+0x1a9/0x360 [mlx5_core]
- [ 2785.368615 ]        mlx5_unload_one_devl_locked+0x5a/0x110 [mlx5_core]
- [ 2785.369934 ]        mlx5_devlink_reload_down+0x292/0x580 [mlx5_core]
- [ 2785.371292 ]        devlink_reload+0x439/0x590
- [ 2785.372075 ]        devlink_nl_cmd_reload+0xaef/0xff0
- [ 2785.372973 ]        genl_family_rcv_msg_doit.isra.0+0x1bd/0x290
- [ 2785.374011 ]        genl_rcv_msg+0x3ca/0x6c0
- [ 2785.374798 ]        netlink_rcv_skb+0x12c/0x360
- [ 2785.375612 ]        genl_rcv+0x24/0x40
- [ 2785.376295 ]        netlink_unicast+0x438/0x710
- [ 2785.377121 ]        netlink_sendmsg+0x7a1/0xca0
- [ 2785.377926 ]        sock_sendmsg+0xc5/0x190
- [ 2785.378668 ]        __sys_sendto+0x1bc/0x290
- [ 2785.379440 ]        __x64_sys_sendto+0xdc/0x1b0
- [ 2785.380255 ]        do_syscall_64+0x3d/0x90
- [ 2785.381031 ]        entry_SYSCALL_64_after_hwframe+0x46/0xb0
- [ 2785.381967 ]
- [ 2785.381967 ] other info that might help us debug this:
- [ 2785.381967 ]
- [ 2785.383448 ]  Possible unsafe locking scenario:
- [ 2785.383448 ]
- [ 2785.384544 ]        CPU0                    CPU1
- [ 2785.385383 ]        ----                    ----
- [ 2785.386193 ]   lock(&dev->cache.rb_lock);
- [ 2785.386940 ]                               lock((work_completion)(&(&ent->dwork)->work));
- [ 2785.388327 ]                               lock(&dev->cache.rb_lock);
- [ 2785.389425 ]   lock((work_completion)(&(&ent->dwork)->work));
- [ 2785.390414 ]
- [ 2785.390414 ]  *** DEADLOCK ***
- [ 2785.390414 ]
- [ 2785.391579 ] 6 locks held by devlink/53872:
- [ 2785.392341 ]  #0: ffffffff84c17a50 (cb_lock){++++}-{3:3}, at: genl_rcv+0x15/0x40
- [ 2785.393630 ]  #1: ffff888142280218 (&devlink->lock_key){+.+.}-{3:3}, at: devlink_get_from_attrs_lock+0x12d/0x2d0
- [ 2785.395324 ]  #2: ffff8881422d3c38 (&dev->lock_key){+.+.}-{3:3}, at: mlx5_unload_one_devl_locked+0x4a/0x110 [mlx5_core]
- [ 2785.397322 ]  #3: ffffffffa0e59068 (mlx5_intf_mutex){+.+.}-{3:3}, at: mlx5_detach_device+0x60/0x360 [mlx5_core]
- [ 2785.399231 ]  #4: ffff88810e3cb0e8 (&dev->mutex){....}-{3:3}, at: device_release_driver_internal+0x8d/0x600
- [ 2785.400864 ]  #5: ffff88817e8f1260 (&dev->cache.rb_lock){+.+.}-{3:3}, at: mlx5_mkey_cache_cleanup+0x77/0x250 [mlx5_ib]
-
-Fixes: b95845178328 ("RDMA/mlx5: Change the cache structure to an RB-tree")
-Signed-off-by: Shay Drory <shayd@nvidia.com>
-Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
-Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- drivers/infiniband/hw/mlx5/mlx5_ib.h |    1 +
- drivers/infiniband/hw/mlx5/mr.c      |   16 ++++++++++++++--
- 2 files changed, 15 insertions(+), 2 deletions(-)
-
---- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
-+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
-@@ -821,6 +821,7 @@ struct mlx5_mkey_cache {
-       struct dentry           *fs_root;
-       unsigned long           last_add;
-       struct delayed_work     remove_ent_dwork;
-+      u8                      disable: 1;
- };
- struct mlx5_ib_port_resources {
---- a/drivers/infiniband/hw/mlx5/mr.c
-+++ b/drivers/infiniband/hw/mlx5/mr.c
-@@ -994,19 +994,27 @@ int mlx5_mkey_cache_cleanup(struct mlx5_
-       if (!dev->cache.wq)
-               return 0;
--      cancel_delayed_work_sync(&dev->cache.remove_ent_dwork);
-       mutex_lock(&dev->cache.rb_lock);
-+      dev->cache.disable = true;
-       for (node = rb_first(root); node; node = rb_next(node)) {
-               ent = rb_entry(node, struct mlx5_cache_ent, node);
-               spin_lock_irq(&ent->mkeys_queue.lock);
-               ent->disabled = true;
-               spin_unlock_irq(&ent->mkeys_queue.lock);
--              cancel_delayed_work_sync(&ent->dwork);
-       }
-+      mutex_unlock(&dev->cache.rb_lock);
-+
-+      /*
-+       * After all entries are disabled and will not reschedule on WQ,
-+       * flush it and all async commands.
-+       */
-+      flush_workqueue(dev->cache.wq);
-       mlx5_mkey_cache_debugfs_cleanup(dev);
-       mlx5_cmd_cleanup_async_ctx(&dev->async_ctx);
-+      /* At this point all entries are disabled and have no concurrent work. */
-+      mutex_lock(&dev->cache.rb_lock);
-       node = rb_first(root);
-       while (node) {
-               ent = rb_entry(node, struct mlx5_cache_ent, node);
-@@ -1789,6 +1797,10 @@ static int cache_ent_find_and_store(stru
-       }
-       mutex_lock(&cache->rb_lock);
-+      if (cache->disable) {
-+              mutex_unlock(&cache->rb_lock);
-+              return 0;
-+      }
-       ent = mkey_cache_ent_from_rb_key(dev, mr->mmkey.rb_key);
-       if (ent) {
-               if (ent->rb_key.ndescs == mr->mmkey.rb_key.ndescs) {
diff --git a/queue-6.1/rdma-mlx5-fix-mkey-cache-wq-flush.patch b/queue-6.1/rdma-mlx5-fix-mkey-cache-wq-flush.patch
deleted file mode 100644 (file)
index c28bcf4..0000000
+++ /dev/null
@@ -1,44 +0,0 @@
-From a53e215f90079f617360439b1b6284820731e34c Mon Sep 17 00:00:00 2001
-From: Moshe Shemesh <moshe@nvidia.com>
-Date: Wed, 25 Oct 2023 20:49:59 +0300
-Subject: RDMA/mlx5: Fix mkey cache WQ flush
-
-From: Moshe Shemesh <moshe@nvidia.com>
-
-commit a53e215f90079f617360439b1b6284820731e34c upstream.
-
-The cited patch tries to ensure no pending works on the mkey cache
-workqueue by disabling adding new works and call flush_workqueue().
-But this workqueue also has delayed works which might still be pending
-the delay time to be queued.
-
-Add cancel_delayed_work() for the delayed works which waits to be queued
-and then the flush_workqueue() will flush all works which are already
-queued and running.
-
-Fixes: 374012b00457 ("RDMA/mlx5: Fix mkey cache possible deadlock on cleanup")
-Link: https://lore.kernel.org/r/b8722f14e7ed81452f791764a26d2ed4cfa11478.1698256179.git.leon@kernel.org
-Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
-Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
-Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- drivers/infiniband/hw/mlx5/mr.c |    2 ++
- 1 file changed, 2 insertions(+)
-
---- a/drivers/infiniband/hw/mlx5/mr.c
-+++ b/drivers/infiniband/hw/mlx5/mr.c
-@@ -1007,11 +1007,13 @@ int mlx5_mkey_cache_cleanup(struct mlx5_
-               return 0;
-       mutex_lock(&dev->cache.rb_lock);
-+      cancel_delayed_work(&dev->cache.remove_ent_dwork);
-       for (node = rb_first(root); node; node = rb_next(node)) {
-               ent = rb_entry(node, struct mlx5_cache_ent, node);
-               spin_lock_irq(&ent->mkeys_queue.lock);
-               ent->disabled = true;
-               spin_unlock_irq(&ent->mkeys_queue.lock);
-+              cancel_delayed_work(&ent->dwork);
-       }
-       mutex_unlock(&dev->cache.rb_lock);
diff --git a/queue-6.1/rdma-mlx5-fix-mr-cache-debugfs-error-in-ib-representors-mode.patch b/queue-6.1/rdma-mlx5-fix-mr-cache-debugfs-error-in-ib-representors-mode.patch
deleted file mode 100644 (file)
index 31fbb71..0000000
+++ /dev/null
@@ -1,52 +0,0 @@
-From 828cf5936bea2438c21a3a6c303b34a2a1f6c3c2 Mon Sep 17 00:00:00 2001
-From: Leon Romanovsky <leonro@nvidia.com>
-Date: Thu, 2 Feb 2023 11:03:06 +0200
-Subject: RDMA/mlx5: Fix MR cache debugfs error in IB representors mode
-
-From: Leon Romanovsky <leonro@nvidia.com>
-
-commit 828cf5936bea2438c21a3a6c303b34a2a1f6c3c2 upstream.
-
-Block MR cache debugfs creation for IB representor flow as MR cache shouldn't be used
-at all in that mode. As part of this change, add missing debugfs cleanup in error path
-too.
-
-This change fixes the following debugfs errors:
-
- bond0: (slave enp8s0f1): Enslaving as a backup interface with an up link
- mlx5_core 0000:08:00.0: lag map: port 1:1 port 2:1
- mlx5_core 0000:08:00.0: shared_fdb:1 mode:queue_affinity
- mlx5_core 0000:08:00.0: Operation mode is single FDB
- debugfs: Directory '2' with parent '/' already present!
-...
- debugfs: Directory '22' with parent '/' already present!
-
-Fixes: 73d09b2fe833 ("RDMA/mlx5: Introduce mlx5r_cache_rb_key")
-Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
-Link: https://lore.kernel.org/r/482a78c54acbcfa1742a0e06a452546428900ffa.1675328463.git.leon@kernel.org
-Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- drivers/infiniband/hw/mlx5/mr.c |    4 ++++
- 1 file changed, 4 insertions(+)
-
---- a/drivers/infiniband/hw/mlx5/mr.c
-+++ b/drivers/infiniband/hw/mlx5/mr.c
-@@ -789,6 +789,9 @@ static void mlx5_mkey_cache_debugfs_add_
-       int order = order_base_2(ent->rb_key.ndescs);
-       struct dentry *dir;
-+      if (!mlx5_debugfs_root || dev->is_rep)
-+              return;
-+
-       if (ent->rb_key.access_mode == MLX5_MKC_ACCESS_MODE_KSM)
-               order = MLX5_IMR_KSM_CACHE_ENTRY + 2;
-@@ -977,6 +980,7 @@ int mlx5_mkey_cache_init(struct mlx5_ib_
- err:
-       mutex_unlock(&cache->rb_lock);
-+      mlx5_mkey_cache_debugfs_cleanup(dev);
-       mlx5_ib_warn(dev, "failed to create mkey cache entry\n");
-       return ret;
- }
diff --git a/queue-6.1/rdma-mlx5-fix-mr-cache-temp-entries-cleanup.patch b/queue-6.1/rdma-mlx5-fix-mr-cache-temp-entries-cleanup.patch
deleted file mode 100644 (file)
index 223ad4d..0000000
+++ /dev/null
@@ -1,214 +0,0 @@
-From 7ebb00cea49db641b458edef0ede389f7004821d Mon Sep 17 00:00:00 2001
-From: Michael Guralnik <michaelgur@nvidia.com>
-Date: Tue, 3 Sep 2024 14:24:50 +0300
-Subject: RDMA/mlx5: Fix MR cache temp entries cleanup
-
-From: Michael Guralnik <michaelgur@nvidia.com>
-
-commit 7ebb00cea49db641b458edef0ede389f7004821d upstream.
-
-Fix the cleanup of the temp cache entries that are dynamically created
-in the MR cache.
-
-The cleanup of the temp cache entries is currently scheduled only when a
-new entry is created. Since in the cleanup of the entries only the mkeys
-are destroyed and the cache entry stays in the cache, subsequent
-registrations might reuse the entry and it will eventually be filled with
-new mkeys without cleanup ever getting scheduled again.
-
-On workloads that register and deregister MRs with a wide range of
-properties we see the cache ends up holding many cache entries, each
-holding the max number of mkeys that were ever used through it.
-
-Additionally, as the cleanup work is scheduled to run over the whole
-cache, any mkey that is returned to the cache after the cleanup was
-scheduled will be held for less than the intended 30 seconds timeout.
-
-Solve both issues by dropping the existing remove_ent_work and reusing
-the existing per-entry work to also handle the temp entries cleanup.
-
-Schedule the work to run with a 30 seconds delay every time we push an
-mkey to a clean temp entry.
-This ensures the cleanup runs on each entry only 30 seconds after the
-first mkey was pushed to an empty entry.
-
-As we have already been distinguishing between persistent and temp entries
-when scheduling the cache_work_func, it is not being scheduled in any
-other flows for the temp entries.
-
-Another benefit from moving to a per-entry cleanup is we now not
-required to hold the rb_tree mutex, thus enabling other flow to run
-concurrently.
-
-Fixes: dd1b913fb0d0 ("RDMA/mlx5: Cache all user cacheable mkeys on dereg MR flow")
-Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
-Link: https://patch.msgid.link/e4fa4bb03bebf20dceae320f26816cd2dde23a26.1725362530.git.leon@kernel.org
-Signed-off-by: Leon Romanovsky <leon@kernel.org>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- drivers/infiniband/hw/mlx5/mlx5_ib.h |    2 
- drivers/infiniband/hw/mlx5/mr.c      |   85 +++++++++++++----------------------
- 2 files changed, 34 insertions(+), 53 deletions(-)
-
---- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
-+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
-@@ -790,6 +790,7 @@ struct mlx5_cache_ent {
-       u8 is_tmp:1;
-       u8 disabled:1;
-       u8 fill_to_high_water:1;
-+      u8 tmp_cleanup_scheduled:1;
-       /*
-        * - limit is the low water mark for stored mkeys, 2* limit is the
-@@ -821,7 +822,6 @@ struct mlx5_mkey_cache {
-       struct mutex            rb_lock;
-       struct dentry           *fs_root;
-       unsigned long           last_add;
--      struct delayed_work     remove_ent_dwork;
- };
- struct mlx5_ib_port_resources {
---- a/drivers/infiniband/hw/mlx5/mr.c
-+++ b/drivers/infiniband/hw/mlx5/mr.c
-@@ -525,6 +525,23 @@ static void queue_adjust_cache_locked(st
-       }
- }
-+static void clean_keys(struct mlx5_ib_dev *dev, struct mlx5_cache_ent *ent)
-+{
-+      u32 mkey;
-+
-+      cancel_delayed_work(&ent->dwork);
-+      spin_lock_irq(&ent->mkeys_queue.lock);
-+      while (ent->mkeys_queue.ci) {
-+              mkey = pop_mkey_locked(ent);
-+              spin_unlock_irq(&ent->mkeys_queue.lock);
-+              mlx5_core_destroy_mkey(dev->mdev, mkey);
-+              spin_lock_irq(&ent->mkeys_queue.lock);
-+      }
-+      ent->tmp_cleanup_scheduled = false;
-+      spin_unlock_irq(&ent->mkeys_queue.lock);
-+}
-+
-+
- static void __cache_work_func(struct mlx5_cache_ent *ent)
- {
-       struct mlx5_ib_dev *dev = ent->dev;
-@@ -596,7 +613,11 @@ static void delayed_cache_work_func(stru
-       struct mlx5_cache_ent *ent;
-       ent = container_of(work, struct mlx5_cache_ent, dwork.work);
--      __cache_work_func(ent);
-+      /* temp entries are never filled, only cleaned */
-+      if (ent->is_tmp)
-+              clean_keys(ent->dev, ent);
-+      else
-+              __cache_work_func(ent);
- }
- static int cache_ent_key_cmp(struct mlx5r_cache_rb_key key1,
-@@ -771,21 +792,6 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(s
-       return _mlx5_mr_cache_alloc(dev, ent, access_flags);
- }
--static void clean_keys(struct mlx5_ib_dev *dev, struct mlx5_cache_ent *ent)
--{
--      u32 mkey;
--
--      cancel_delayed_work(&ent->dwork);
--      spin_lock_irq(&ent->mkeys_queue.lock);
--      while (ent->mkeys_queue.ci) {
--              mkey = pop_mkey_locked(ent);
--              spin_unlock_irq(&ent->mkeys_queue.lock);
--              mlx5_core_destroy_mkey(dev->mdev, mkey);
--              spin_lock_irq(&ent->mkeys_queue.lock);
--      }
--      spin_unlock_irq(&ent->mkeys_queue.lock);
--}
--
- static void mlx5_mkey_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
- {
-       if (!mlx5_debugfs_root || dev->is_rep)
-@@ -898,10 +904,6 @@ mlx5r_cache_create_ent_locked(struct mlx
-                       ent->limit = 0;
-               mlx5_mkey_cache_debugfs_add_ent(dev, ent);
--      } else {
--              mod_delayed_work(ent->dev->cache.wq,
--                               &ent->dev->cache.remove_ent_dwork,
--                               msecs_to_jiffies(30 * 1000));
-       }
-       return ent;
-@@ -912,35 +914,6 @@ mkeys_err:
-       return ERR_PTR(ret);
- }
--static void remove_ent_work_func(struct work_struct *work)
--{
--      struct mlx5_mkey_cache *cache;
--      struct mlx5_cache_ent *ent;
--      struct rb_node *cur;
--
--      cache = container_of(work, struct mlx5_mkey_cache,
--                           remove_ent_dwork.work);
--      mutex_lock(&cache->rb_lock);
--      cur = rb_last(&cache->rb_root);
--      while (cur) {
--              ent = rb_entry(cur, struct mlx5_cache_ent, node);
--              cur = rb_prev(cur);
--              mutex_unlock(&cache->rb_lock);
--
--              spin_lock_irq(&ent->mkeys_queue.lock);
--              if (!ent->is_tmp) {
--                      spin_unlock_irq(&ent->mkeys_queue.lock);
--                      mutex_lock(&cache->rb_lock);
--                      continue;
--              }
--              spin_unlock_irq(&ent->mkeys_queue.lock);
--
--              clean_keys(ent->dev, ent);
--              mutex_lock(&cache->rb_lock);
--      }
--      mutex_unlock(&cache->rb_lock);
--}
--
- int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
- {
-       struct mlx5_mkey_cache *cache = &dev->cache;
-@@ -956,7 +929,6 @@ int mlx5_mkey_cache_init(struct mlx5_ib_
-       mutex_init(&dev->slow_path_mutex);
-       mutex_init(&dev->cache.rb_lock);
-       dev->cache.rb_root = RB_ROOT;
--      INIT_DELAYED_WORK(&dev->cache.remove_ent_dwork, remove_ent_work_func);
-       cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM);
-       if (!cache->wq) {
-               mlx5_ib_warn(dev, "failed to create work queue\n");
-@@ -1007,7 +979,6 @@ int mlx5_mkey_cache_cleanup(struct mlx5_
-               return 0;
-       mutex_lock(&dev->cache.rb_lock);
--      cancel_delayed_work(&dev->cache.remove_ent_dwork);
-       for (node = rb_first(root); node; node = rb_next(node)) {
-               ent = rb_entry(node, struct mlx5_cache_ent, node);
-               spin_lock_irq(&ent->mkeys_queue.lock);
-@@ -1844,8 +1815,18 @@ static int mlx5_revoke_mr(struct mlx5_ib
-       struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
-       struct mlx5_cache_ent *ent = mr->mmkey.cache_ent;
--      if (mr->mmkey.cacheable && !mlx5r_umr_revoke_mr(mr) && !cache_ent_find_and_store(dev, mr))
-+      if (mr->mmkey.cacheable && !mlx5r_umr_revoke_mr(mr) && !cache_ent_find_and_store(dev, mr)) {
-+              ent = mr->mmkey.cache_ent;
-+              /* upon storing to a clean temp entry - schedule its cleanup */
-+              spin_lock_irq(&ent->mkeys_queue.lock);
-+              if (ent->is_tmp && !ent->tmp_cleanup_scheduled) {
-+                      mod_delayed_work(ent->dev->cache.wq, &ent->dwork,
-+                                       msecs_to_jiffies(30 * 1000));
-+                      ent->tmp_cleanup_scheduled = true;
-+              }
-+              spin_unlock_irq(&ent->mkeys_queue.lock);
-               return 0;
-+      }
-       if (ent) {
-               spin_lock_irq(&ent->mkeys_queue.lock);
diff --git a/queue-6.1/rdma-mlx5-fix-the-recovery-flow-of-the-umr-qp.patch b/queue-6.1/rdma-mlx5-fix-the-recovery-flow-of-the-umr-qp.patch
deleted file mode 100644 (file)
index 401d194..0000000
+++ /dev/null
@@ -1,209 +0,0 @@
-From 15ed43c7d41f9929ea55919272003c7ba5aec402 Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Sun, 19 Jan 2025 14:36:13 +0200
-Subject: RDMA/mlx5: Fix the recovery flow of the UMR QP
-
-From: Yishai Hadas <yishaih@nvidia.com>
-
-[ Upstream commit d97505baea64d93538b16baf14ce7b8c1fbad746 ]
-
-This patch addresses an issue in the recovery flow of the UMR QP,
-ensuring tasks do not get stuck, as highlighted by the call trace [1].
-
-During recovery, before transitioning the QP to the RESET state, the
-software must wait for all outstanding WRs to complete.
-
-Failing to do so can cause the firmware to skip sending some flushed
-CQEs with errors and simply discard them upon the RESET, as per the IB
-specification.
-
-This race condition can result in lost CQEs and tasks becoming stuck.
-
-To resolve this, the patch sends a final WR which serves only as a
-barrier before moving the QP state to RESET.
-
-Once a CQE is received for that final WR, it guarantees that no
-outstanding WRs remain, making it safe to transition the QP to RESET and
-subsequently back to RTS, restoring proper functionality.
-
-Note:
-For the barrier WR, we simply reuse the failed and ready WR.
-Since the QP is in an error state, it will only receive
-IB_WC_WR_FLUSH_ERR. However, as it serves only as a barrier we don't
-care about its status.
-
-[1]
-INFO: task rdma_resource_l:1922 blocked for more than 120 seconds.
-Tainted: G        W          6.12.0-rc7+ #1626
-"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
-task:rdma_resource_l state:D stack:0  pid:1922 tgid:1922  ppid:1369
-     flags:0x00004004
-Call Trace:
-<TASK>
-__schedule+0x420/0xd30
-schedule+0x47/0x130
-schedule_timeout+0x280/0x300
-? mark_held_locks+0x48/0x80
-? lockdep_hardirqs_on_prepare+0xe5/0x1a0
-wait_for_completion+0x75/0x130
-mlx5r_umr_post_send_wait+0x3c2/0x5b0 [mlx5_ib]
-? __pfx_mlx5r_umr_done+0x10/0x10 [mlx5_ib]
-mlx5r_umr_revoke_mr+0x93/0xc0 [mlx5_ib]
-__mlx5_ib_dereg_mr+0x299/0x520 [mlx5_ib]
-? _raw_spin_unlock_irq+0x24/0x40
-? wait_for_completion+0xfe/0x130
-? rdma_restrack_put+0x63/0xe0 [ib_core]
-ib_dereg_mr_user+0x5f/0x120 [ib_core]
-? lock_release+0xc6/0x280
-destroy_hw_idr_uobject+0x1d/0x60 [ib_uverbs]
-uverbs_destroy_uobject+0x58/0x1d0 [ib_uverbs]
-uobj_destroy+0x3f/0x70 [ib_uverbs]
-ib_uverbs_cmd_verbs+0x3e4/0xbb0 [ib_uverbs]
-? __pfx_uverbs_destroy_def_handler+0x10/0x10 [ib_uverbs]
-? __lock_acquire+0x64e/0x2080
-? mark_held_locks+0x48/0x80
-? find_held_lock+0x2d/0xa0
-? lock_acquire+0xc1/0x2f0
-? ib_uverbs_ioctl+0xcb/0x170 [ib_uverbs]
-? __fget_files+0xc3/0x1b0
-ib_uverbs_ioctl+0xe7/0x170 [ib_uverbs]
-? ib_uverbs_ioctl+0xcb/0x170 [ib_uverbs]
-__x64_sys_ioctl+0x1b0/0xa70
-do_syscall_64+0x6b/0x140
-entry_SYSCALL_64_after_hwframe+0x76/0x7e
-RIP: 0033:0x7f99c918b17b
-RSP: 002b:00007ffc766d0468 EFLAGS: 00000246 ORIG_RAX:
-     0000000000000010
-RAX: ffffffffffffffda RBX: 00007ffc766d0578 RCX:
-     00007f99c918b17b
-RDX: 00007ffc766d0560 RSI: 00000000c0181b01 RDI:
-     0000000000000003
-RBP: 00007ffc766d0540 R08: 00007f99c8f99010 R09:
-     000000000000bd7e
-R10: 00007f99c94c1c70 R11: 0000000000000246 R12:
-     00007ffc766d0530
-R13: 000000000000001c R14: 0000000040246a80 R15:
-     0000000000000000
-</TASK>
-
-Fixes: 158e71bb69e3 ("RDMA/mlx5: Add a umr recovery flow")
-Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
-Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
-Link: https://patch.msgid.link/27b51b92ec42dfb09d8096fcbd51878f397ce6ec.1737290141.git.leon@kernel.org
-Signed-off-by: Leon Romanovsky <leon@kernel.org>
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- drivers/infiniband/hw/mlx5/umr.c | 83 +++++++++++++++++++++-----------
- 1 file changed, 56 insertions(+), 27 deletions(-)
-
-diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c
-index fa000182d0b41..1a39e86178ece 100644
---- a/drivers/infiniband/hw/mlx5/umr.c
-+++ b/drivers/infiniband/hw/mlx5/umr.c
-@@ -199,30 +199,6 @@ void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev)
-       ib_dealloc_pd(dev->umrc.pd);
- }
--static int mlx5r_umr_recover(struct mlx5_ib_dev *dev)
--{
--      struct umr_common *umrc = &dev->umrc;
--      struct ib_qp_attr attr;
--      int err;
--
--      attr.qp_state = IB_QPS_RESET;
--      err = ib_modify_qp(umrc->qp, &attr, IB_QP_STATE);
--      if (err) {
--              mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
--              goto err;
--      }
--
--      err = mlx5r_umr_qp_rst2rts(dev, umrc->qp);
--      if (err)
--              goto err;
--
--      umrc->state = MLX5_UMR_STATE_ACTIVE;
--      return 0;
--
--err:
--      umrc->state = MLX5_UMR_STATE_ERR;
--      return err;
--}
- static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe,
-                              struct mlx5r_umr_wqe *wqe, bool with_data)
-@@ -270,6 +246,61 @@ static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe,
-       return err;
- }
-+static int mlx5r_umr_recover(struct mlx5_ib_dev *dev, u32 mkey,
-+                           struct mlx5r_umr_context *umr_context,
-+                           struct mlx5r_umr_wqe *wqe, bool with_data)
-+{
-+      struct umr_common *umrc = &dev->umrc;
-+      struct ib_qp_attr attr;
-+      int err;
-+
-+      mutex_lock(&umrc->lock);
-+      /* Preventing any further WRs to be sent now */
-+      if (umrc->state != MLX5_UMR_STATE_RECOVER) {
-+              mlx5_ib_warn(dev, "UMR recovery encountered an unexpected state=%d\n",
-+                           umrc->state);
-+              umrc->state = MLX5_UMR_STATE_RECOVER;
-+      }
-+      mutex_unlock(&umrc->lock);
-+
-+      /* Sending a final/barrier WR (the failed one) and wait for its completion.
-+       * This will ensure that all the previous WRs got a completion before
-+       * we set the QP state to RESET.
-+       */
-+      err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context->cqe, wqe,
-+                                with_data);
-+      if (err) {
-+              mlx5_ib_warn(dev, "UMR recovery post send failed, err %d\n", err);
-+              goto err;
-+      }
-+
-+      /* Since the QP is in an error state, it will only receive
-+       * IB_WC_WR_FLUSH_ERR. However, as it serves only as a barrier
-+       * we don't care about its status.
-+       */
-+      wait_for_completion(&umr_context->done);
-+
-+      attr.qp_state = IB_QPS_RESET;
-+      err = ib_modify_qp(umrc->qp, &attr, IB_QP_STATE);
-+      if (err) {
-+              mlx5_ib_warn(dev, "Couldn't modify UMR QP to RESET, err=%d\n", err);
-+              goto err;
-+      }
-+
-+      err = mlx5r_umr_qp_rst2rts(dev, umrc->qp);
-+      if (err) {
-+              mlx5_ib_warn(dev, "Couldn't modify UMR QP to RTS, err=%d\n", err);
-+              goto err;
-+      }
-+
-+      umrc->state = MLX5_UMR_STATE_ACTIVE;
-+      return 0;
-+
-+err:
-+      umrc->state = MLX5_UMR_STATE_ERR;
-+      return err;
-+}
-+
- static void mlx5r_umr_done(struct ib_cq *cq, struct ib_wc *wc)
- {
-       struct mlx5_ib_umr_context *context =
-@@ -334,9 +365,7 @@ static int mlx5r_umr_post_send_wait(struct mlx5_ib_dev *dev, u32 mkey,
-               mlx5_ib_warn(dev,
-                       "reg umr failed (%u). Trying to recover and resubmit the flushed WQEs, mkey = %u\n",
-                       umr_context.status, mkey);
--              mutex_lock(&umrc->lock);
--              err = mlx5r_umr_recover(dev);
--              mutex_unlock(&umrc->lock);
-+              err = mlx5r_umr_recover(dev, mkey, &umr_context, wqe, with_data);
-               if (err)
-                       mlx5_ib_warn(dev, "couldn't recover UMR, err %d\n",
-                                    err);
--- 
-2.39.5
-
diff --git a/queue-6.1/rdma-mlx5-follow-rb_key.ats-when-creating-new-mkeys.patch b/queue-6.1/rdma-mlx5-follow-rb_key.ats-when-creating-new-mkeys.patch
deleted file mode 100644 (file)
index c513393..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-From f637040c3339a2ed8c12d65ad03f9552386e2fe7 Mon Sep 17 00:00:00 2001
-From: Jason Gunthorpe <jgg@nvidia.com>
-Date: Tue, 28 May 2024 15:52:53 +0300
-Subject: RDMA/mlx5: Follow rb_key.ats when creating new mkeys
-
-From: Jason Gunthorpe <jgg@nvidia.com>
-
-commit f637040c3339a2ed8c12d65ad03f9552386e2fe7 upstream.
-
-When a cache ent already exists but doesn't have any mkeys in it the cache
-will automatically create a new one based on the specification in the
-ent->rb_key.
-
-ent->ats was missed when creating the new key and so ma_translation_mode
-was not being set even though the ent requires it.
-
-Cc: stable@vger.kernel.org
-Fixes: 73d09b2fe833 ("RDMA/mlx5: Introduce mlx5r_cache_rb_key")
-Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
-Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
-Link: https://lore.kernel.org/r/7c5613458ecb89fbe5606b7aa4c8d990bdea5b9a.1716900410.git.leon@kernel.org
-Signed-off-by: Leon Romanovsky <leon@kernel.org>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- drivers/infiniband/hw/mlx5/mr.c |    1 +
- 1 file changed, 1 insertion(+)
-
---- a/drivers/infiniband/hw/mlx5/mr.c
-+++ b/drivers/infiniband/hw/mlx5/mr.c
-@@ -243,6 +243,7 @@ static void set_cache_mkc(struct mlx5_ca
-       MLX5_SET(mkc, mkc, access_mode_1_0, ent->rb_key.access_mode & 0x3);
-       MLX5_SET(mkc, mkc, access_mode_4_2,
-               (ent->rb_key.access_mode >> 2) & 0x7);
-+      MLX5_SET(mkc, mkc, ma_translation_mode, !!ent->rb_key.ats);
-       MLX5_SET(mkc, mkc, translations_octword_size,
-                get_mkc_octo_size(ent->rb_key.access_mode,
diff --git a/queue-6.1/rdma-mlx5-implement-mkeys-management-via-lifo-queue.patch b/queue-6.1/rdma-mlx5-implement-mkeys-management-via-lifo-queue.patch
deleted file mode 100644 (file)
index c89fbf4..0000000
+++ /dev/null
@@ -1,704 +0,0 @@
-From 73daa66bd410fa9662f7e4578ac5b58338c23b31 Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Thu, 21 Sep 2023 11:07:16 +0300
-Subject: RDMA/mlx5: Implement mkeys management via LIFO queue
-
-From: Shay Drory <shayd@nvidia.com>
-
-[ Upstream commit 57e7071683ef6148c9f5ea0ba84598d2ba681375 ]
-
-Currently, mkeys are managed via xarray. This implementation leads to
-a degradation in cases many MRs are unregistered in parallel, due to xarray
-internal implementation, for example: deregistration 1M MRs via 64 threads
-is taking ~15% more time[1].
-
-Hence, implement mkeys management via LIFO queue, which solved the
-degradation.
-
-[1]
-2.8us in kernel v5.19 compare to 3.2us in kernel v6.4
-
-Signed-off-by: Shay Drory <shayd@nvidia.com>
-Link: https://lore.kernel.org/r/fde3d4cfab0f32f0ccb231cd113298256e1502c5.1695283384.git.leon@kernel.org
-Signed-off-by: Leon Romanovsky <leon@kernel.org>
-Stable-dep-of: d97505baea64 ("RDMA/mlx5: Fix the recovery flow of the UMR QP")
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- drivers/infiniband/hw/mlx5/mlx5_ib.h |  21 +-
- drivers/infiniband/hw/mlx5/mr.c      | 324 ++++++++++++---------------
- drivers/infiniband/hw/mlx5/umr.c     |   4 +-
- 3 files changed, 169 insertions(+), 180 deletions(-)
-
-diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
-index 7c72e0e9db54a..024d2071c6a5d 100644
---- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
-+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
-@@ -760,10 +760,25 @@ struct umr_common {
-       unsigned int state;
- };
-+#define NUM_MKEYS_PER_PAGE \
-+      ((PAGE_SIZE - sizeof(struct list_head)) / sizeof(u32))
-+
-+struct mlx5_mkeys_page {
-+      u32 mkeys[NUM_MKEYS_PER_PAGE];
-+      struct list_head list;
-+};
-+static_assert(sizeof(struct mlx5_mkeys_page) == PAGE_SIZE);
-+
-+struct mlx5_mkeys_queue {
-+      struct list_head pages_list;
-+      u32 num_pages;
-+      unsigned long ci;
-+      spinlock_t lock; /* sync list ops */
-+};
-+
- struct mlx5_cache_ent {
--      struct xarray           mkeys;
--      unsigned long           stored;
--      unsigned long           reserved;
-+      struct mlx5_mkeys_queue mkeys_queue;
-+      u32                     pending;
-       char                    name[4];
-diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
-index 2c1a935734273..b66b8346c2dc6 100644
---- a/drivers/infiniband/hw/mlx5/mr.c
-+++ b/drivers/infiniband/hw/mlx5/mr.c
-@@ -140,110 +140,47 @@ static void create_mkey_warn(struct mlx5_ib_dev *dev, int status, void *out)
-       mlx5_cmd_out_err(dev->mdev, MLX5_CMD_OP_CREATE_MKEY, 0, out);
- }
--static int push_mkey_locked(struct mlx5_cache_ent *ent, bool limit_pendings,
--                          void *to_store)
-+static int push_mkey_locked(struct mlx5_cache_ent *ent, u32 mkey)
- {
--      XA_STATE(xas, &ent->mkeys, 0);
--      void *curr;
-+      unsigned long tmp = ent->mkeys_queue.ci % NUM_MKEYS_PER_PAGE;
-+      struct mlx5_mkeys_page *page;
--      if (limit_pendings &&
--          (ent->reserved - ent->stored) > MAX_PENDING_REG_MR)
--              return -EAGAIN;
--
--      while (1) {
--              /*
--               * This is cmpxchg (NULL, XA_ZERO_ENTRY) however this version
--               * doesn't transparently unlock. Instead we set the xas index to
--               * the current value of reserved every iteration.
--               */
--              xas_set(&xas, ent->reserved);
--              curr = xas_load(&xas);
--              if (!curr) {
--                      if (to_store && ent->stored == ent->reserved)
--                              xas_store(&xas, to_store);
--                      else
--                              xas_store(&xas, XA_ZERO_ENTRY);
--                      if (xas_valid(&xas)) {
--                              ent->reserved++;
--                              if (to_store) {
--                                      if (ent->stored != ent->reserved)
--                                              __xa_store(&ent->mkeys,
--                                                         ent->stored,
--                                                         to_store,
--                                                         GFP_KERNEL);
--                                      ent->stored++;
--                                      queue_adjust_cache_locked(ent);
--                                      WRITE_ONCE(ent->dev->cache.last_add,
--                                                 jiffies);
--                              }
--                      }
--              }
--              xa_unlock_irq(&ent->mkeys);
--
--              /*
--               * Notice xas_nomem() must always be called as it cleans
--               * up any cached allocation.
--               */
--              if (!xas_nomem(&xas, GFP_KERNEL))
--                      break;
--              xa_lock_irq(&ent->mkeys);
-+      lockdep_assert_held(&ent->mkeys_queue.lock);
-+      if (ent->mkeys_queue.ci >=
-+          ent->mkeys_queue.num_pages * NUM_MKEYS_PER_PAGE) {
-+              page = kzalloc(sizeof(*page), GFP_ATOMIC);
-+              if (!page)
-+                      return -ENOMEM;
-+              ent->mkeys_queue.num_pages++;
-+              list_add_tail(&page->list, &ent->mkeys_queue.pages_list);
-+      } else {
-+              page = list_last_entry(&ent->mkeys_queue.pages_list,
-+                                     struct mlx5_mkeys_page, list);
-       }
--      xa_lock_irq(&ent->mkeys);
--      if (xas_error(&xas))
--              return xas_error(&xas);
--      if (WARN_ON(curr))
--              return -EINVAL;
--      return 0;
--}
--
--static int push_mkey(struct mlx5_cache_ent *ent, bool limit_pendings,
--                   void *to_store)
--{
--      int ret;
--
--      xa_lock_irq(&ent->mkeys);
--      ret = push_mkey_locked(ent, limit_pendings, to_store);
--      xa_unlock_irq(&ent->mkeys);
--      return ret;
--}
--
--static void undo_push_reserve_mkey(struct mlx5_cache_ent *ent)
--{
--      void *old;
--
--      ent->reserved--;
--      old = __xa_erase(&ent->mkeys, ent->reserved);
--      WARN_ON(old);
--}
--
--static void push_to_reserved(struct mlx5_cache_ent *ent, u32 mkey)
--{
--      void *old;
--      old = __xa_store(&ent->mkeys, ent->stored, xa_mk_value(mkey), 0);
--      WARN_ON(old);
--      ent->stored++;
-+      page->mkeys[tmp] = mkey;
-+      ent->mkeys_queue.ci++;
-+      return 0;
- }
--static u32 pop_stored_mkey(struct mlx5_cache_ent *ent)
-+static int pop_mkey_locked(struct mlx5_cache_ent *ent)
- {
--      void *old, *xa_mkey;
--
--      ent->stored--;
--      ent->reserved--;
-+      unsigned long tmp = (ent->mkeys_queue.ci - 1) % NUM_MKEYS_PER_PAGE;
-+      struct mlx5_mkeys_page *last_page;
-+      u32 mkey;
--      if (ent->stored == ent->reserved) {
--              xa_mkey = __xa_erase(&ent->mkeys, ent->stored);
--              WARN_ON(!xa_mkey);
--              return (u32)xa_to_value(xa_mkey);
-+      lockdep_assert_held(&ent->mkeys_queue.lock);
-+      last_page = list_last_entry(&ent->mkeys_queue.pages_list,
-+                                  struct mlx5_mkeys_page, list);
-+      mkey = last_page->mkeys[tmp];
-+      last_page->mkeys[tmp] = 0;
-+      ent->mkeys_queue.ci--;
-+      if (ent->mkeys_queue.num_pages > 1 && !tmp) {
-+              list_del(&last_page->list);
-+              ent->mkeys_queue.num_pages--;
-+              kfree(last_page);
-       }
--
--      xa_mkey = __xa_store(&ent->mkeys, ent->stored, XA_ZERO_ENTRY,
--                           GFP_KERNEL);
--      WARN_ON(!xa_mkey || xa_is_err(xa_mkey));
--      old = __xa_erase(&ent->mkeys, ent->reserved);
--      WARN_ON(old);
--      return (u32)xa_to_value(xa_mkey);
-+      return mkey;
- }
- static void create_mkey_callback(int status, struct mlx5_async_work *context)
-@@ -257,10 +194,10 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context)
-       if (status) {
-               create_mkey_warn(dev, status, mkey_out->out);
-               kfree(mkey_out);
--              xa_lock_irqsave(&ent->mkeys, flags);
--              undo_push_reserve_mkey(ent);
-+              spin_lock_irqsave(&ent->mkeys_queue.lock, flags);
-+              ent->pending--;
-               WRITE_ONCE(dev->fill_delay, 1);
--              xa_unlock_irqrestore(&ent->mkeys, flags);
-+              spin_unlock_irqrestore(&ent->mkeys_queue.lock, flags);
-               mod_timer(&dev->delay_timer, jiffies + HZ);
-               return;
-       }
-@@ -269,11 +206,12 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context)
-               MLX5_GET(create_mkey_out, mkey_out->out, mkey_index));
-       WRITE_ONCE(dev->cache.last_add, jiffies);
--      xa_lock_irqsave(&ent->mkeys, flags);
--      push_to_reserved(ent, mkey_out->mkey);
-+      spin_lock_irqsave(&ent->mkeys_queue.lock, flags);
-+      push_mkey_locked(ent, mkey_out->mkey);
-       /* If we are doing fill_to_high_water then keep going. */
-       queue_adjust_cache_locked(ent);
--      xa_unlock_irqrestore(&ent->mkeys, flags);
-+      ent->pending--;
-+      spin_unlock_irqrestore(&ent->mkeys_queue.lock, flags);
-       kfree(mkey_out);
- }
-@@ -329,24 +267,28 @@ static int add_keys(struct mlx5_cache_ent *ent, unsigned int num)
-               set_cache_mkc(ent, mkc);
-               async_create->ent = ent;
--              err = push_mkey(ent, true, NULL);
--              if (err)
-+              spin_lock_irq(&ent->mkeys_queue.lock);
-+              if (ent->pending >= MAX_PENDING_REG_MR) {
-+                      err = -EAGAIN;
-                       goto free_async_create;
-+              }
-+              ent->pending++;
-+              spin_unlock_irq(&ent->mkeys_queue.lock);
-               err = mlx5_ib_create_mkey_cb(async_create);
-               if (err) {
-                       mlx5_ib_warn(ent->dev, "create mkey failed %d\n", err);
--                      goto err_undo_reserve;
-+                      goto err_create_mkey;
-               }
-       }
-       return 0;
--err_undo_reserve:
--      xa_lock_irq(&ent->mkeys);
--      undo_push_reserve_mkey(ent);
--      xa_unlock_irq(&ent->mkeys);
-+err_create_mkey:
-+      spin_lock_irq(&ent->mkeys_queue.lock);
-+      ent->pending--;
- free_async_create:
-+      spin_unlock_irq(&ent->mkeys_queue.lock);
-       kfree(async_create);
-       return err;
- }
-@@ -379,36 +321,36 @@ static void remove_cache_mr_locked(struct mlx5_cache_ent *ent)
- {
-       u32 mkey;
--      lockdep_assert_held(&ent->mkeys.xa_lock);
--      if (!ent->stored)
-+      lockdep_assert_held(&ent->mkeys_queue.lock);
-+      if (!ent->mkeys_queue.ci)
-               return;
--      mkey = pop_stored_mkey(ent);
--      xa_unlock_irq(&ent->mkeys);
-+      mkey = pop_mkey_locked(ent);
-+      spin_unlock_irq(&ent->mkeys_queue.lock);
-       mlx5_core_destroy_mkey(ent->dev->mdev, mkey);
--      xa_lock_irq(&ent->mkeys);
-+      spin_lock_irq(&ent->mkeys_queue.lock);
- }
- static int resize_available_mrs(struct mlx5_cache_ent *ent, unsigned int target,
-                               bool limit_fill)
--       __acquires(&ent->mkeys) __releases(&ent->mkeys)
-+      __acquires(&ent->mkeys_queue.lock) __releases(&ent->mkeys_queue.lock)
- {
-       int err;
--      lockdep_assert_held(&ent->mkeys.xa_lock);
-+      lockdep_assert_held(&ent->mkeys_queue.lock);
-       while (true) {
-               if (limit_fill)
-                       target = ent->limit * 2;
--              if (target == ent->reserved)
-+              if (target == ent->pending + ent->mkeys_queue.ci)
-                       return 0;
--              if (target > ent->reserved) {
--                      u32 todo = target - ent->reserved;
-+              if (target > ent->pending + ent->mkeys_queue.ci) {
-+                      u32 todo = target - (ent->pending + ent->mkeys_queue.ci);
--                      xa_unlock_irq(&ent->mkeys);
-+                      spin_unlock_irq(&ent->mkeys_queue.lock);
-                       err = add_keys(ent, todo);
-                       if (err == -EAGAIN)
-                               usleep_range(3000, 5000);
--                      xa_lock_irq(&ent->mkeys);
-+                      spin_lock_irq(&ent->mkeys_queue.lock);
-                       if (err) {
-                               if (err != -EAGAIN)
-                                       return err;
-@@ -436,7 +378,7 @@ static ssize_t size_write(struct file *filp, const char __user *buf,
-        * cannot free MRs that are in use. Compute the target value for stored
-        * mkeys.
-        */
--      xa_lock_irq(&ent->mkeys);
-+      spin_lock_irq(&ent->mkeys_queue.lock);
-       if (target < ent->in_use) {
-               err = -EINVAL;
-               goto err_unlock;
-@@ -449,12 +391,12 @@ static ssize_t size_write(struct file *filp, const char __user *buf,
-       err = resize_available_mrs(ent, target, false);
-       if (err)
-               goto err_unlock;
--      xa_unlock_irq(&ent->mkeys);
-+      spin_unlock_irq(&ent->mkeys_queue.lock);
-       return count;
- err_unlock:
--      xa_unlock_irq(&ent->mkeys);
-+      spin_unlock_irq(&ent->mkeys_queue.lock);
-       return err;
- }
-@@ -465,7 +407,8 @@ static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
-       char lbuf[20];
-       int err;
--      err = snprintf(lbuf, sizeof(lbuf), "%ld\n", ent->stored + ent->in_use);
-+      err = snprintf(lbuf, sizeof(lbuf), "%ld\n",
-+                     ent->mkeys_queue.ci + ent->in_use);
-       if (err < 0)
-               return err;
-@@ -494,10 +437,10 @@ static ssize_t limit_write(struct file *filp, const char __user *buf,
-        * Upon set we immediately fill the cache to high water mark implied by
-        * the limit.
-        */
--      xa_lock_irq(&ent->mkeys);
-+      spin_lock_irq(&ent->mkeys_queue.lock);
-       ent->limit = var;
-       err = resize_available_mrs(ent, 0, true);
--      xa_unlock_irq(&ent->mkeys);
-+      spin_unlock_irq(&ent->mkeys_queue.lock);
-       if (err)
-               return err;
-       return count;
-@@ -533,9 +476,9 @@ static bool someone_adding(struct mlx5_mkey_cache *cache)
-       mutex_lock(&cache->rb_lock);
-       for (node = rb_first(&cache->rb_root); node; node = rb_next(node)) {
-               ent = rb_entry(node, struct mlx5_cache_ent, node);
--              xa_lock_irq(&ent->mkeys);
--              ret = ent->stored < ent->limit;
--              xa_unlock_irq(&ent->mkeys);
-+              spin_lock_irq(&ent->mkeys_queue.lock);
-+              ret = ent->mkeys_queue.ci < ent->limit;
-+              spin_unlock_irq(&ent->mkeys_queue.lock);
-               if (ret) {
-                       mutex_unlock(&cache->rb_lock);
-                       return true;
-@@ -552,26 +495,26 @@ static bool someone_adding(struct mlx5_mkey_cache *cache)
-  */
- static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent)
- {
--      lockdep_assert_held(&ent->mkeys.xa_lock);
-+      lockdep_assert_held(&ent->mkeys_queue.lock);
-       if (ent->disabled || READ_ONCE(ent->dev->fill_delay) || ent->is_tmp)
-               return;
--      if (ent->stored < ent->limit) {
-+      if (ent->mkeys_queue.ci < ent->limit) {
-               ent->fill_to_high_water = true;
-               mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0);
-       } else if (ent->fill_to_high_water &&
--                 ent->reserved < 2 * ent->limit) {
-+                 ent->mkeys_queue.ci + ent->pending < 2 * ent->limit) {
-               /*
-                * Once we start populating due to hitting a low water mark
-                * continue until we pass the high water mark.
-                */
-               mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0);
--      } else if (ent->stored == 2 * ent->limit) {
-+      } else if (ent->mkeys_queue.ci == 2 * ent->limit) {
-               ent->fill_to_high_water = false;
--      } else if (ent->stored > 2 * ent->limit) {
-+      } else if (ent->mkeys_queue.ci > 2 * ent->limit) {
-               /* Queue deletion of excess entries */
-               ent->fill_to_high_water = false;
--              if (ent->stored != ent->reserved)
-+              if (ent->pending)
-                       queue_delayed_work(ent->dev->cache.wq, &ent->dwork,
-                                          msecs_to_jiffies(1000));
-               else
-@@ -585,15 +528,16 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
-       struct mlx5_mkey_cache *cache = &dev->cache;
-       int err;
--      xa_lock_irq(&ent->mkeys);
-+      spin_lock_irq(&ent->mkeys_queue.lock);
-       if (ent->disabled)
-               goto out;
--      if (ent->fill_to_high_water && ent->reserved < 2 * ent->limit &&
-+      if (ent->fill_to_high_water &&
-+          ent->mkeys_queue.ci + ent->pending < 2 * ent->limit &&
-           !READ_ONCE(dev->fill_delay)) {
--              xa_unlock_irq(&ent->mkeys);
-+              spin_unlock_irq(&ent->mkeys_queue.lock);
-               err = add_keys(ent, 1);
--              xa_lock_irq(&ent->mkeys);
-+              spin_lock_irq(&ent->mkeys_queue.lock);
-               if (ent->disabled)
-                       goto out;
-               if (err) {
-@@ -611,7 +555,7 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
-                                                  msecs_to_jiffies(1000));
-                       }
-               }
--      } else if (ent->stored > 2 * ent->limit) {
-+      } else if (ent->mkeys_queue.ci > 2 * ent->limit) {
-               bool need_delay;
-               /*
-@@ -626,11 +570,11 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
-                * the garbage collection work to try to run in next cycle, in
-                * order to free CPU resources to other tasks.
-                */
--              xa_unlock_irq(&ent->mkeys);
-+              spin_unlock_irq(&ent->mkeys_queue.lock);
-               need_delay = need_resched() || someone_adding(cache) ||
-                            !time_after(jiffies,
-                                        READ_ONCE(cache->last_add) + 300 * HZ);
--              xa_lock_irq(&ent->mkeys);
-+              spin_lock_irq(&ent->mkeys_queue.lock);
-               if (ent->disabled)
-                       goto out;
-               if (need_delay) {
-@@ -641,7 +585,7 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
-               queue_adjust_cache_locked(ent);
-       }
- out:
--      xa_unlock_irq(&ent->mkeys);
-+      spin_unlock_irq(&ent->mkeys_queue.lock);
- }
- static void delayed_cache_work_func(struct work_struct *work)
-@@ -749,25 +693,25 @@ static struct mlx5_ib_mr *_mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
-       if (!mr)
-               return ERR_PTR(-ENOMEM);
--      xa_lock_irq(&ent->mkeys);
-+      spin_lock_irq(&ent->mkeys_queue.lock);
-       ent->in_use++;
--      if (!ent->stored) {
-+      if (!ent->mkeys_queue.ci) {
-               queue_adjust_cache_locked(ent);
-               ent->miss++;
--              xa_unlock_irq(&ent->mkeys);
-+              spin_unlock_irq(&ent->mkeys_queue.lock);
-               err = create_cache_mkey(ent, &mr->mmkey.key);
-               if (err) {
--                      xa_lock_irq(&ent->mkeys);
-+                      spin_lock_irq(&ent->mkeys_queue.lock);
-                       ent->in_use--;
--                      xa_unlock_irq(&ent->mkeys);
-+                      spin_unlock_irq(&ent->mkeys_queue.lock);
-                       kfree(mr);
-                       return ERR_PTR(err);
-               }
-       } else {
--              mr->mmkey.key = pop_stored_mkey(ent);
-+              mr->mmkey.key = pop_mkey_locked(ent);
-               queue_adjust_cache_locked(ent);
--              xa_unlock_irq(&ent->mkeys);
-+              spin_unlock_irq(&ent->mkeys_queue.lock);
-       }
-       mr->mmkey.cache_ent = ent;
-       mr->mmkey.type = MLX5_MKEY_MR;
-@@ -820,14 +764,14 @@ static void clean_keys(struct mlx5_ib_dev *dev, struct mlx5_cache_ent *ent)
-       u32 mkey;
-       cancel_delayed_work(&ent->dwork);
--      xa_lock_irq(&ent->mkeys);
--      while (ent->stored) {
--              mkey = pop_stored_mkey(ent);
--              xa_unlock_irq(&ent->mkeys);
-+      spin_lock_irq(&ent->mkeys_queue.lock);
-+      while (ent->mkeys_queue.ci) {
-+              mkey = pop_mkey_locked(ent);
-+              spin_unlock_irq(&ent->mkeys_queue.lock);
-               mlx5_core_destroy_mkey(dev->mdev, mkey);
--              xa_lock_irq(&ent->mkeys);
-+              spin_lock_irq(&ent->mkeys_queue.lock);
-       }
--      xa_unlock_irq(&ent->mkeys);
-+      spin_unlock_irq(&ent->mkeys_queue.lock);
- }
- static void mlx5_mkey_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
-@@ -852,7 +796,7 @@ static void mlx5_mkey_cache_debugfs_add_ent(struct mlx5_ib_dev *dev,
-       dir = debugfs_create_dir(ent->name, dev->cache.fs_root);
-       debugfs_create_file("size", 0600, dir, ent, &size_fops);
-       debugfs_create_file("limit", 0600, dir, ent, &limit_fops);
--      debugfs_create_ulong("cur", 0400, dir, &ent->stored);
-+      debugfs_create_ulong("cur", 0400, dir, &ent->mkeys_queue.ci);
-       debugfs_create_u32("miss", 0600, dir, &ent->miss);
- }
-@@ -874,6 +818,31 @@ static void delay_time_func(struct timer_list *t)
-       WRITE_ONCE(dev->fill_delay, 0);
- }
-+static int mlx5r_mkeys_init(struct mlx5_cache_ent *ent)
-+{
-+      struct mlx5_mkeys_page *page;
-+
-+      page = kzalloc(sizeof(*page), GFP_KERNEL);
-+      if (!page)
-+              return -ENOMEM;
-+      INIT_LIST_HEAD(&ent->mkeys_queue.pages_list);
-+      spin_lock_init(&ent->mkeys_queue.lock);
-+      list_add_tail(&page->list, &ent->mkeys_queue.pages_list);
-+      ent->mkeys_queue.num_pages++;
-+      return 0;
-+}
-+
-+static void mlx5r_mkeys_uninit(struct mlx5_cache_ent *ent)
-+{
-+      struct mlx5_mkeys_page *page;
-+
-+      WARN_ON(ent->mkeys_queue.ci || ent->mkeys_queue.num_pages > 1);
-+      page = list_last_entry(&ent->mkeys_queue.pages_list,
-+                             struct mlx5_mkeys_page, list);
-+      list_del(&page->list);
-+      kfree(page);
-+}
-+
- struct mlx5_cache_ent *
- mlx5r_cache_create_ent_locked(struct mlx5_ib_dev *dev,
-                             struct mlx5r_cache_rb_key rb_key,
-@@ -887,7 +856,9 @@ mlx5r_cache_create_ent_locked(struct mlx5_ib_dev *dev,
-       if (!ent)
-               return ERR_PTR(-ENOMEM);
--      xa_init_flags(&ent->mkeys, XA_FLAGS_LOCK_IRQ);
-+      ret = mlx5r_mkeys_init(ent);
-+      if (ret)
-+              goto mkeys_err;
-       ent->rb_key = rb_key;
-       ent->dev = dev;
-       ent->is_tmp = !persistent_entry;
-@@ -895,10 +866,8 @@ mlx5r_cache_create_ent_locked(struct mlx5_ib_dev *dev,
-       INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
-       ret = mlx5_cache_ent_insert(&dev->cache, ent);
--      if (ret) {
--              kfree(ent);
--              return ERR_PTR(ret);
--      }
-+      if (ret)
-+              goto ent_insert_err;
-       if (persistent_entry) {
-               if (rb_key.access_mode == MLX5_MKC_ACCESS_MODE_KSM)
-@@ -921,6 +890,11 @@ mlx5r_cache_create_ent_locked(struct mlx5_ib_dev *dev,
-       }
-       return ent;
-+ent_insert_err:
-+      mlx5r_mkeys_uninit(ent);
-+mkeys_err:
-+      kfree(ent);
-+      return ERR_PTR(ret);
- }
- static void remove_ent_work_func(struct work_struct *work)
-@@ -938,13 +912,13 @@ static void remove_ent_work_func(struct work_struct *work)
-               cur = rb_prev(cur);
-               mutex_unlock(&cache->rb_lock);
--              xa_lock_irq(&ent->mkeys);
-+              spin_lock_irq(&ent->mkeys_queue.lock);
-               if (!ent->is_tmp) {
--                      xa_unlock_irq(&ent->mkeys);
-+                      spin_unlock_irq(&ent->mkeys_queue.lock);
-                       mutex_lock(&cache->rb_lock);
-                       continue;
-               }
--              xa_unlock_irq(&ent->mkeys);
-+              spin_unlock_irq(&ent->mkeys_queue.lock);
-               clean_keys(ent->dev, ent);
-               mutex_lock(&cache->rb_lock);
-@@ -994,9 +968,9 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
-       mutex_unlock(&cache->rb_lock);
-       for (node = rb_first(root); node; node = rb_next(node)) {
-               ent = rb_entry(node, struct mlx5_cache_ent, node);
--              xa_lock_irq(&ent->mkeys);
-+              spin_lock_irq(&ent->mkeys_queue.lock);
-               queue_adjust_cache_locked(ent);
--              xa_unlock_irq(&ent->mkeys);
-+              spin_unlock_irq(&ent->mkeys_queue.lock);
-       }
-       return 0;
-@@ -1020,9 +994,9 @@ int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev)
-       mutex_lock(&dev->cache.rb_lock);
-       for (node = rb_first(root); node; node = rb_next(node)) {
-               ent = rb_entry(node, struct mlx5_cache_ent, node);
--              xa_lock_irq(&ent->mkeys);
-+              spin_lock_irq(&ent->mkeys_queue.lock);
-               ent->disabled = true;
--              xa_unlock_irq(&ent->mkeys);
-+              spin_unlock_irq(&ent->mkeys_queue.lock);
-               cancel_delayed_work_sync(&ent->dwork);
-       }
-@@ -1035,6 +1009,7 @@ int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev)
-               node = rb_next(node);
-               clean_keys(dev, ent);
-               rb_erase(&ent->node, root);
-+              mlx5r_mkeys_uninit(ent);
-               kfree(ent);
-       }
-       mutex_unlock(&dev->cache.rb_lock);
-@@ -1802,7 +1777,7 @@ static int cache_ent_find_and_store(struct mlx5_ib_dev *dev,
-       int ret;
-       if (mr->mmkey.cache_ent) {
--              xa_lock_irq(&mr->mmkey.cache_ent->mkeys);
-+              spin_lock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock);
-               mr->mmkey.cache_ent->in_use--;
-               goto end;
-       }
-@@ -1816,7 +1791,7 @@ static int cache_ent_find_and_store(struct mlx5_ib_dev *dev,
-                               return -EOPNOTSUPP;
-                       }
-                       mr->mmkey.cache_ent = ent;
--                      xa_lock_irq(&mr->mmkey.cache_ent->mkeys);
-+                      spin_lock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock);
-                       mutex_unlock(&cache->rb_lock);
-                       goto end;
-               }
-@@ -1828,12 +1803,11 @@ static int cache_ent_find_and_store(struct mlx5_ib_dev *dev,
-               return PTR_ERR(ent);
-       mr->mmkey.cache_ent = ent;
--      xa_lock_irq(&mr->mmkey.cache_ent->mkeys);
-+      spin_lock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock);
- end:
--      ret = push_mkey_locked(mr->mmkey.cache_ent, false,
--                             xa_mk_value(mr->mmkey.key));
--      xa_unlock_irq(&mr->mmkey.cache_ent->mkeys);
-+      ret = push_mkey_locked(mr->mmkey.cache_ent, mr->mmkey.key);
-+      spin_unlock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock);
-       return ret;
- }
-diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c
-index cb5cee3dee2b6..fa000182d0b41 100644
---- a/drivers/infiniband/hw/mlx5/umr.c
-+++ b/drivers/infiniband/hw/mlx5/umr.c
-@@ -332,8 +332,8 @@ static int mlx5r_umr_post_send_wait(struct mlx5_ib_dev *dev, u32 mkey,
-               WARN_ON_ONCE(1);
-               mlx5_ib_warn(dev,
--                      "reg umr failed (%u). Trying to recover and resubmit the flushed WQEs\n",
--                      umr_context.status);
-+                      "reg umr failed (%u). Trying to recover and resubmit the flushed WQEs, mkey = %u\n",
-+                      umr_context.status, mkey);
-               mutex_lock(&umrc->lock);
-               err = mlx5r_umr_recover(dev);
-               mutex_unlock(&umrc->lock);
--- 
-2.39.5
-
diff --git a/queue-6.1/rdma-mlx5-introduce-mlx5r_cache_rb_key.patch b/queue-6.1/rdma-mlx5-introduce-mlx5r_cache_rb_key.patch
deleted file mode 100644 (file)
index 21bcc75..0000000
+++ /dev/null
@@ -1,565 +0,0 @@
-From dee0c2d2ab0dbb79d87e227f8b4136f1764cefb4 Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Thu, 26 Jan 2023 00:28:05 +0200
-Subject: RDMA/mlx5: Introduce mlx5r_cache_rb_key
-
-From: Michael Guralnik <michaelgur@nvidia.com>
-
-[ Upstream commit 73d09b2fe8336f5f37935e46418666ddbcd3c343 ]
-
-Switch from using the mkey order to using the new struct as the key to the
-RB tree of cache entries.
-
-The key is all the mkey properties that UMR operations can't modify.
-Using this key to define the cache entries and to search and create cache
-mkeys.
-
-Link: https://lore.kernel.org/r/20230125222807.6921-5-michaelgur@nvidia.com
-Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
-Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
-Stable-dep-of: d97505baea64 ("RDMA/mlx5: Fix the recovery flow of the UMR QP")
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- drivers/infiniband/hw/mlx5/mlx5_ib.h |  27 ++--
- drivers/infiniband/hw/mlx5/mr.c      | 228 +++++++++++++++++++--------
- drivers/infiniband/hw/mlx5/odp.c     |  30 ++--
- 3 files changed, 201 insertions(+), 84 deletions(-)
-
-diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
-index bd998ac8c29c1..7c9d5648947e9 100644
---- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
-+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
-@@ -637,6 +637,13 @@ enum mlx5_mkey_type {
-       MLX5_MKEY_INDIRECT_DEVX,
- };
-+struct mlx5r_cache_rb_key {
-+      u8 ats:1;
-+      unsigned int access_mode;
-+      unsigned int access_flags;
-+      unsigned int ndescs;
-+};
-+
- struct mlx5_ib_mkey {
-       u32 key;
-       enum mlx5_mkey_type type;
-@@ -757,11 +764,9 @@ struct mlx5_cache_ent {
-       unsigned long           reserved;
-       char                    name[4];
--      u32                     order;
--      u32                     access_mode;
--      unsigned int            ndescs;
-       struct rb_node          node;
-+      struct mlx5r_cache_rb_key rb_key;
-       u8 disabled:1;
-       u8 fill_to_high_water:1;
-@@ -1340,14 +1345,13 @@ int mlx5_ib_get_cqe_size(struct ib_cq *ibcq);
- int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev);
- int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev);
- struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev,
--                                            int order);
-+                                            struct mlx5r_cache_rb_key rb_key,
-+                                            bool persistent_entry);
- struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
--                                     struct mlx5_cache_ent *ent,
--                                     int access_flags);
-+                                     int access_flags, int access_mode,
-+                                     int ndescs);
--struct mlx5_ib_mr *mlx5_mr_cache_alloc_order(struct mlx5_ib_dev *dev, u32 order,
--                                           int access_flags);
- int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
-                           struct ib_mr_status *mr_status);
- struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
-@@ -1370,7 +1374,7 @@ int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq);
- void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev);
- int __init mlx5_ib_odp_init(void);
- void mlx5_ib_odp_cleanup(void);
--void mlx5_odp_init_mkey_cache_entry(struct mlx5_cache_ent *ent);
-+int mlx5_odp_init_mkey_cache(struct mlx5_ib_dev *dev);
- void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
-                          struct mlx5_ib_mr *mr, int flags);
-@@ -1389,7 +1393,10 @@ static inline int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev,
- static inline void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev) {}
- static inline int mlx5_ib_odp_init(void) { return 0; }
- static inline void mlx5_ib_odp_cleanup(void)                              {}
--static inline void mlx5_odp_init_mkey_cache_entry(struct mlx5_cache_ent *ent) {}
-+static inline int mlx5_odp_init_mkey_cache(struct mlx5_ib_dev *dev)
-+{
-+      return 0;
-+}
- static inline void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
-                                        struct mlx5_ib_mr *mr, int flags) {}
-diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
-index b3d83920d3cfb..1060b30a837a0 100644
---- a/drivers/infiniband/hw/mlx5/mr.c
-+++ b/drivers/infiniband/hw/mlx5/mr.c
-@@ -292,11 +292,13 @@ static void set_cache_mkc(struct mlx5_cache_ent *ent, void *mkc)
-       set_mkc_access_pd_addr_fields(mkc, 0, 0, ent->dev->umrc.pd);
-       MLX5_SET(mkc, mkc, free, 1);
-       MLX5_SET(mkc, mkc, umr_en, 1);
--      MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3);
--      MLX5_SET(mkc, mkc, access_mode_4_2, (ent->access_mode >> 2) & 0x7);
-+      MLX5_SET(mkc, mkc, access_mode_1_0, ent->rb_key.access_mode & 0x3);
-+      MLX5_SET(mkc, mkc, access_mode_4_2,
-+              (ent->rb_key.access_mode >> 2) & 0x7);
-       MLX5_SET(mkc, mkc, translations_octword_size,
--               get_mkc_octo_size(ent->access_mode, ent->ndescs));
-+               get_mkc_octo_size(ent->rb_key.access_mode,
-+                                 ent->rb_key.ndescs));
-       MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
- }
-@@ -594,8 +596,8 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
-                       if (err != -EAGAIN) {
-                               mlx5_ib_warn(
-                                       dev,
--                                      "command failed order %d, err %d\n",
--                                      ent->order, err);
-+                                      "add keys command failed, err %d\n",
-+                                      err);
-                               queue_delayed_work(cache->wq, &ent->dwork,
-                                                  msecs_to_jiffies(1000));
-                       }
-@@ -641,22 +643,49 @@ static void delayed_cache_work_func(struct work_struct *work)
-       __cache_work_func(ent);
- }
-+static int cache_ent_key_cmp(struct mlx5r_cache_rb_key key1,
-+                           struct mlx5r_cache_rb_key key2)
-+{
-+      int res;
-+
-+      res = key1.ats - key2.ats;
-+      if (res)
-+              return res;
-+
-+      res = key1.access_mode - key2.access_mode;
-+      if (res)
-+              return res;
-+
-+      res = key1.access_flags - key2.access_flags;
-+      if (res)
-+              return res;
-+
-+      /*
-+       * keep ndescs the last in the compare table since the find function
-+       * searches for an exact match on all properties and only closest
-+       * match in size.
-+       */
-+      return key1.ndescs - key2.ndescs;
-+}
-+
- static int mlx5_cache_ent_insert(struct mlx5_mkey_cache *cache,
-                                struct mlx5_cache_ent *ent)
- {
-       struct rb_node **new = &cache->rb_root.rb_node, *parent = NULL;
-       struct mlx5_cache_ent *cur;
-+      int cmp;
-       mutex_lock(&cache->rb_lock);
-       /* Figure out where to put new node */
-       while (*new) {
-               cur = rb_entry(*new, struct mlx5_cache_ent, node);
-               parent = *new;
--              if (ent->order < cur->order)
-+              cmp = cache_ent_key_cmp(cur->rb_key, ent->rb_key);
-+              if (cmp > 0)
-                       new = &((*new)->rb_left);
--              if (ent->order > cur->order)
-+              if (cmp < 0)
-                       new = &((*new)->rb_right);
--              if (ent->order == cur->order) {
-+              if (cmp == 0) {
-                       mutex_unlock(&cache->rb_lock);
-                       return -EEXIST;
-               }
-@@ -670,40 +699,45 @@ static int mlx5_cache_ent_insert(struct mlx5_mkey_cache *cache,
-       return 0;
- }
--static struct mlx5_cache_ent *mkey_cache_ent_from_order(struct mlx5_ib_dev *dev,
--                                                      unsigned int order)
-+static struct mlx5_cache_ent *
-+mkey_cache_ent_from_rb_key(struct mlx5_ib_dev *dev,
-+                         struct mlx5r_cache_rb_key rb_key)
- {
-       struct rb_node *node = dev->cache.rb_root.rb_node;
-       struct mlx5_cache_ent *cur, *smallest = NULL;
-+      int cmp;
-       /*
-        * Find the smallest ent with order >= requested_order.
-        */
-       while (node) {
-               cur = rb_entry(node, struct mlx5_cache_ent, node);
--              if (cur->order > order) {
-+              cmp = cache_ent_key_cmp(cur->rb_key, rb_key);
-+              if (cmp > 0) {
-                       smallest = cur;
-                       node = node->rb_left;
-               }
--              if (cur->order < order)
-+              if (cmp < 0)
-                       node = node->rb_right;
--              if (cur->order == order)
-+              if (cmp == 0)
-                       return cur;
-       }
--      return smallest;
-+      return (smallest &&
-+              smallest->rb_key.access_mode == rb_key.access_mode &&
-+              smallest->rb_key.access_flags == rb_key.access_flags &&
-+              smallest->rb_key.ats == rb_key.ats) ?
-+                     smallest :
-+                     NULL;
- }
--struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
--                                     struct mlx5_cache_ent *ent,
--                                     int access_flags)
-+static struct mlx5_ib_mr *_mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
-+                                      struct mlx5_cache_ent *ent,
-+                                      int access_flags)
- {
-       struct mlx5_ib_mr *mr;
-       int err;
--      if (!mlx5r_umr_can_reconfig(dev, 0, access_flags))
--              return ERR_PTR(-EOPNOTSUPP);
--
-       mr = kzalloc(sizeof(*mr), GFP_KERNEL);
-       if (!mr)
-               return ERR_PTR(-ENOMEM);
-@@ -734,12 +768,44 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
-       return mr;
- }
--struct mlx5_ib_mr *mlx5_mr_cache_alloc_order(struct mlx5_ib_dev *dev,
--                                           u32 order, int access_flags)
-+static int get_unchangeable_access_flags(struct mlx5_ib_dev *dev,
-+                                       int access_flags)
-+{
-+      int ret = 0;
-+
-+      if ((access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
-+          MLX5_CAP_GEN(dev->mdev, atomic) &&
-+          MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))
-+              ret |= IB_ACCESS_REMOTE_ATOMIC;
-+
-+      if ((access_flags & IB_ACCESS_RELAXED_ORDERING) &&
-+          MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) &&
-+          !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
-+              ret |= IB_ACCESS_RELAXED_ORDERING;
-+
-+      if ((access_flags & IB_ACCESS_RELAXED_ORDERING) &&
-+          MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) &&
-+          !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
-+              ret |= IB_ACCESS_RELAXED_ORDERING;
-+
-+      return ret;
-+}
-+
-+struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
-+                                     int access_flags, int access_mode,
-+                                     int ndescs)
- {
--      struct mlx5_cache_ent *ent = mkey_cache_ent_from_order(dev, order);
-+      struct mlx5r_cache_rb_key rb_key = {
-+              .ndescs = ndescs,
-+              .access_mode = access_mode,
-+              .access_flags = get_unchangeable_access_flags(dev, access_flags)
-+      };
-+      struct mlx5_cache_ent *ent = mkey_cache_ent_from_rb_key(dev, rb_key);
--      return mlx5_mr_cache_alloc(dev, ent, access_flags);
-+      if (!ent)
-+              return ERR_PTR(-EOPNOTSUPP);
-+
-+      return _mlx5_mr_cache_alloc(dev, ent, access_flags);
- }
- static void clean_keys(struct mlx5_ib_dev *dev, struct mlx5_cache_ent *ent)
-@@ -766,28 +832,32 @@ static void mlx5_mkey_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
-       dev->cache.fs_root = NULL;
- }
-+static void mlx5_mkey_cache_debugfs_add_ent(struct mlx5_ib_dev *dev,
-+                                          struct mlx5_cache_ent *ent)
-+{
-+      int order = order_base_2(ent->rb_key.ndescs);
-+      struct dentry *dir;
-+
-+      if (ent->rb_key.access_mode == MLX5_MKC_ACCESS_MODE_KSM)
-+              order = MLX5_IMR_KSM_CACHE_ENTRY + 2;
-+
-+      sprintf(ent->name, "%d", order);
-+      dir = debugfs_create_dir(ent->name, dev->cache.fs_root);
-+      debugfs_create_file("size", 0600, dir, ent, &size_fops);
-+      debugfs_create_file("limit", 0600, dir, ent, &limit_fops);
-+      debugfs_create_ulong("cur", 0400, dir, &ent->stored);
-+      debugfs_create_u32("miss", 0600, dir, &ent->miss);
-+}
-+
- static void mlx5_mkey_cache_debugfs_init(struct mlx5_ib_dev *dev)
- {
-+      struct dentry *dbg_root = mlx5_debugfs_get_dev_root(dev->mdev);
-       struct mlx5_mkey_cache *cache = &dev->cache;
--      struct mlx5_cache_ent *ent;
--      struct dentry *dir;
--      int i;
-       if (!mlx5_debugfs_root || dev->is_rep)
-               return;
--      dir = mlx5_debugfs_get_dev_root(dev->mdev);
--      cache->fs_root = debugfs_create_dir("mr_cache", dir);
--
--      for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) {
--              ent = mkey_cache_ent_from_order(dev, i);
--              sprintf(ent->name, "%d", ent->order);
--              dir = debugfs_create_dir(ent->name, cache->fs_root);
--              debugfs_create_file("size", 0600, dir, ent, &size_fops);
--              debugfs_create_file("limit", 0600, dir, ent, &limit_fops);
--              debugfs_create_ulong("cur", 0400, dir, &ent->stored);
--              debugfs_create_u32("miss", 0600, dir, &ent->miss);
--      }
-+      cache->fs_root = debugfs_create_dir("mr_cache", dbg_root);
- }
- static void delay_time_func(struct timer_list *t)
-@@ -798,9 +868,11 @@ static void delay_time_func(struct timer_list *t)
- }
- struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev,
--                                            int order)
-+                                            struct mlx5r_cache_rb_key rb_key,
-+                                            bool persistent_entry)
- {
-       struct mlx5_cache_ent *ent;
-+      int order;
-       int ret;
-       ent = kzalloc(sizeof(*ent), GFP_KERNEL);
-@@ -808,7 +880,7 @@ struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev,
-               return ERR_PTR(-ENOMEM);
-       xa_init_flags(&ent->mkeys, XA_FLAGS_LOCK_IRQ);
--      ent->order = order;
-+      ent->rb_key = rb_key;
-       ent->dev = dev;
-       INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
-@@ -818,13 +890,36 @@ struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev,
-               kfree(ent);
-               return ERR_PTR(ret);
-       }
-+
-+      if (persistent_entry) {
-+              if (rb_key.access_mode == MLX5_MKC_ACCESS_MODE_KSM)
-+                      order = MLX5_IMR_KSM_CACHE_ENTRY;
-+              else
-+                      order = order_base_2(rb_key.ndescs) - 2;
-+
-+              if ((dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) &&
-+                  !dev->is_rep && mlx5_core_is_pf(dev->mdev) &&
-+                  mlx5r_umr_can_load_pas(dev, 0))
-+                      ent->limit = dev->mdev->profile.mr_cache[order].limit;
-+              else
-+                      ent->limit = 0;
-+
-+              mlx5_mkey_cache_debugfs_add_ent(dev, ent);
-+      }
-+
-       return ent;
- }
- int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
- {
-       struct mlx5_mkey_cache *cache = &dev->cache;
-+      struct rb_root *root = &dev->cache.rb_root;
-+      struct mlx5r_cache_rb_key rb_key = {
-+              .access_mode = MLX5_MKC_ACCESS_MODE_MTT,
-+      };
-       struct mlx5_cache_ent *ent;
-+      struct rb_node *node;
-+      int ret;
-       int i;
-       mutex_init(&dev->slow_path_mutex);
-@@ -838,33 +933,32 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
-       mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx);
-       timer_setup(&dev->delay_timer, delay_time_func, 0);
--      for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) {
--              ent = mlx5r_cache_create_ent(dev, i);
--
--              if (i > MKEY_CACHE_LAST_STD_ENTRY) {
--                      mlx5_odp_init_mkey_cache_entry(ent);
--                      continue;
-+      mlx5_mkey_cache_debugfs_init(dev);
-+      for (i = 0; i <= mkey_cache_max_order(dev); i++) {
-+              rb_key.ndescs = 1 << (i + 2);
-+              ent = mlx5r_cache_create_ent(dev, rb_key, true);
-+              if (IS_ERR(ent)) {
-+                      ret = PTR_ERR(ent);
-+                      goto err;
-               }
-+      }
--              if (ent->order > mkey_cache_max_order(dev))
--                      continue;
-+      ret = mlx5_odp_init_mkey_cache(dev);
-+      if (ret)
-+              goto err;
--              ent->ndescs = 1 << ent->order;
--              ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
--              if ((dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) &&
--                  !dev->is_rep && mlx5_core_is_pf(dev->mdev) &&
--                  mlx5r_umr_can_load_pas(dev, 0))
--                      ent->limit = dev->mdev->profile.mr_cache[i].limit;
--              else
--                      ent->limit = 0;
-+      for (node = rb_first(root); node; node = rb_next(node)) {
-+              ent = rb_entry(node, struct mlx5_cache_ent, node);
-               xa_lock_irq(&ent->mkeys);
-               queue_adjust_cache_locked(ent);
-               xa_unlock_irq(&ent->mkeys);
-       }
--      mlx5_mkey_cache_debugfs_init(dev);
--
-       return 0;
-+
-+err:
-+      mlx5_ib_warn(dev, "failed to create mkey cache entry\n");
-+      return ret;
- }
- int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev)
-@@ -965,7 +1059,7 @@ static int get_octo_len(u64 addr, u64 len, int page_shift)
- static int mkey_cache_max_order(struct mlx5_ib_dev *dev)
- {
-       if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
--              return MKEY_CACHE_LAST_STD_ENTRY + 2;
-+              return MKEY_CACHE_LAST_STD_ENTRY;
-       return MLX5_MAX_UMR_SHIFT;
- }
-@@ -995,6 +1089,9 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
-                                            struct ib_umem *umem, u64 iova,
-                                            int access_flags)
- {
-+      struct mlx5r_cache_rb_key rb_key = {
-+              .access_mode = MLX5_MKC_ACCESS_MODE_MTT,
-+      };
-       struct mlx5_ib_dev *dev = to_mdev(pd->device);
-       struct mlx5_cache_ent *ent;
-       struct mlx5_ib_mr *mr;
-@@ -1007,8 +1104,11 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
-                                                    0, iova);
-       if (WARN_ON(!page_size))
-               return ERR_PTR(-EINVAL);
--      ent = mkey_cache_ent_from_order(
--              dev, order_base_2(ib_umem_num_dma_blocks(umem, page_size)));
-+
-+      rb_key.ndescs = ib_umem_num_dma_blocks(umem, page_size);
-+      rb_key.ats = mlx5_umem_needs_ats(dev, umem, access_flags);
-+      rb_key.access_flags = get_unchangeable_access_flags(dev, access_flags);
-+      ent = mkey_cache_ent_from_rb_key(dev, rb_key);
-       /*
-        * Matches access in alloc_cache_mr(). If the MR can't come from the
-        * cache then synchronously create an uncached one.
-@@ -1022,7 +1122,7 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
-               return mr;
-       }
--      mr = mlx5_mr_cache_alloc(dev, ent, access_flags);
-+      mr = _mlx5_mr_cache_alloc(dev, ent, access_flags);
-       if (IS_ERR(mr))
-               return mr;
-@@ -1452,7 +1552,7 @@ static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr,
-               mlx5_umem_find_best_pgsz(new_umem, mkc, log_page_size, 0, iova);
-       if (WARN_ON(!*page_size))
-               return false;
--      return (1ULL << mr->mmkey.cache_ent->order) >=
-+      return (mr->mmkey.cache_ent->rb_key.ndescs) >=
-              ib_umem_num_dma_blocks(new_umem, *page_size);
- }
-diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
-index 7f68940ca0d1e..96d4faabbff8a 100644
---- a/drivers/infiniband/hw/mlx5/odp.c
-+++ b/drivers/infiniband/hw/mlx5/odp.c
-@@ -406,7 +406,6 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev,
- static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
-                                               unsigned long idx)
- {
--      int order = order_base_2(MLX5_IMR_MTT_ENTRIES);
-       struct mlx5_ib_dev *dev = mr_to_mdev(imr);
-       struct ib_umem_odp *odp;
-       struct mlx5_ib_mr *mr;
-@@ -419,8 +418,9 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
-       if (IS_ERR(odp))
-               return ERR_CAST(odp);
--      BUILD_BUG_ON(order > MKEY_CACHE_LAST_STD_ENTRY);
--      mr = mlx5_mr_cache_alloc_order(dev, order, imr->access_flags);
-+      mr = mlx5_mr_cache_alloc(dev, imr->access_flags,
-+                               MLX5_MKC_ACCESS_MODE_MTT,
-+                               MLX5_IMR_MTT_ENTRIES);
-       if (IS_ERR(mr)) {
-               ib_umem_odp_release(odp);
-               return mr;
-@@ -494,8 +494,8 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
-       if (IS_ERR(umem_odp))
-               return ERR_CAST(umem_odp);
--      imr = mlx5_mr_cache_alloc_order(dev, MLX5_IMR_KSM_CACHE_ENTRY,
--                                      access_flags);
-+      imr = mlx5_mr_cache_alloc(dev, access_flags, MLX5_MKC_ACCESS_MODE_KSM,
-+                                mlx5_imr_ksm_entries);
-       if (IS_ERR(imr)) {
-               ib_umem_odp_release(umem_odp);
-               return imr;
-@@ -1591,12 +1591,22 @@ mlx5_ib_odp_destroy_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
-       return err;
- }
--void mlx5_odp_init_mkey_cache_entry(struct mlx5_cache_ent *ent)
-+int mlx5_odp_init_mkey_cache(struct mlx5_ib_dev *dev)
- {
--      if (!(ent->dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
--              return;
--      ent->ndescs = mlx5_imr_ksm_entries;
--      ent->access_mode = MLX5_MKC_ACCESS_MODE_KSM;
-+      struct mlx5r_cache_rb_key rb_key = {
-+              .access_mode = MLX5_MKC_ACCESS_MODE_KSM,
-+              .ndescs = mlx5_imr_ksm_entries,
-+      };
-+      struct mlx5_cache_ent *ent;
-+
-+      if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
-+              return 0;
-+
-+      ent = mlx5r_cache_create_ent(dev, rb_key, true);
-+      if (IS_ERR(ent))
-+              return PTR_ERR(ent);
-+
-+      return 0;
- }
- static const struct ib_device_ops mlx5_ib_dev_odp_ops = {
--- 
-2.39.5
-
diff --git a/queue-6.1/rdma-mlx5-limit-usage-of-over-sized-mkeys-from-the-mr-cache.patch b/queue-6.1/rdma-mlx5-limit-usage-of-over-sized-mkeys-from-the-mr-cache.patch
deleted file mode 100644 (file)
index c68b5d0..0000000
+++ /dev/null
@@ -1,92 +0,0 @@
-From ee6d57a2e13d11ce9050cfc3e3b69ef707a44a63 Mon Sep 17 00:00:00 2001
-From: Michael Guralnik <michaelgur@nvidia.com>
-Date: Tue, 3 Sep 2024 14:24:49 +0300
-Subject: RDMA/mlx5: Limit usage of over-sized mkeys from the MR cache
-
-From: Michael Guralnik <michaelgur@nvidia.com>
-
-commit ee6d57a2e13d11ce9050cfc3e3b69ef707a44a63 upstream.
-
-When searching the MR cache for suitable cache entries, don't use mkeys
-larger than twice the size required for the MR.
-This should ensure the usage of mkeys closer to the minimal required size
-and reduce memory waste.
-
-On driver init we create entries for mkeys with clear attributes and
-powers of 2 sizes from 4 to the max supported size.
-This solves the issue for anyone using mkeys that fit these
-requirements.
-
-In the use case where an MR is registered with different attributes,
-like an access flag we can't UMR, we'll create a new cache entry to store
-it upon dereg.
-Without this fix, any later registration with same attributes and smaller
-size will use the newly created cache entry and it's mkeys, disregarding
-the memory waste of using mkeys larger than required.
-
-For example, one worst-case scenario can be when registering and
-deregistering a 1GB mkey with ATS enabled which will cause the creation of
-a new cache entry to hold those type of mkeys. A user registering a 4k MR
-with ATS will end up using the new cache entry and an mkey that can
-support a 1GB MR, thus wasting x250k memory than actually needed in the HW.
-
-Additionally, allow all small registration to use the smallest size
-cache entry that is initialized on driver load even if size is larger
-than twice the required size.
-
-Fixes: 73d09b2fe833 ("RDMA/mlx5: Introduce mlx5r_cache_rb_key")
-Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
-Link: https://patch.msgid.link/8ba3a6e3748aace2026de8b83da03aba084f78f4.1725362530.git.leon@kernel.org
-Signed-off-by: Leon Romanovsky <leon@kernel.org>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- drivers/infiniband/hw/mlx5/mr.c |   14 ++++++++++++--
- 1 file changed, 12 insertions(+), 2 deletions(-)
-
---- a/drivers/infiniband/hw/mlx5/mr.c
-+++ b/drivers/infiniband/hw/mlx5/mr.c
-@@ -48,6 +48,7 @@ enum {
-       MAX_PENDING_REG_MR = 8,
- };
-+#define MLX5_MR_CACHE_PERSISTENT_ENTRY_MIN_DESCS 4
- #define MLX5_UMR_ALIGN 2048
- static void
-@@ -656,6 +657,7 @@ mkey_cache_ent_from_rb_key(struct mlx5_i
- {
-       struct rb_node *node = dev->cache.rb_root.rb_node;
-       struct mlx5_cache_ent *cur, *smallest = NULL;
-+      u64 ndescs_limit;
-       int cmp;
-       /*
-@@ -674,10 +676,18 @@ mkey_cache_ent_from_rb_key(struct mlx5_i
-                       return cur;
-       }
-+      /*
-+       * Limit the usage of mkeys larger than twice the required size while
-+       * also allowing the usage of smallest cache entry for small MRs.
-+       */
-+      ndescs_limit = max_t(u64, rb_key.ndescs * 2,
-+                           MLX5_MR_CACHE_PERSISTENT_ENTRY_MIN_DESCS);
-+
-       return (smallest &&
-               smallest->rb_key.access_mode == rb_key.access_mode &&
-               smallest->rb_key.access_flags == rb_key.access_flags &&
--              smallest->rb_key.ats == rb_key.ats) ?
-+              smallest->rb_key.ats == rb_key.ats &&
-+              smallest->rb_key.ndescs <= ndescs_limit) ?
-                      smallest :
-                      NULL;
- }
-@@ -958,7 +968,7 @@ int mlx5_mkey_cache_init(struct mlx5_ib_
-       mlx5_mkey_cache_debugfs_init(dev);
-       mutex_lock(&cache->rb_lock);
-       for (i = 0; i <= mkey_cache_max_order(dev); i++) {
--              rb_key.ndescs = 1 << (i + 2);
-+              rb_key.ndescs = MLX5_MR_CACHE_PERSISTENT_ENTRY_MIN_DESCS << i;
-               ent = mlx5r_cache_create_ent_locked(dev, rb_key, true);
-               if (IS_ERR(ent)) {
-                       ret = PTR_ERR(ent);
diff --git a/queue-6.1/rdma-mlx5-reduce-qp-table-exposure.patch b/queue-6.1/rdma-mlx5-reduce-qp-table-exposure.patch
deleted file mode 100644 (file)
index 8abe91b..0000000
+++ /dev/null
@@ -1,80 +0,0 @@
-From 31e1b4f44049773843852197aab66262fea5d3ca Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Mon, 5 Jun 2023 13:14:05 +0300
-Subject: RDMA/mlx5: Reduce QP table exposure
-
-From: Leon Romanovsky <leonro@nvidia.com>
-
-[ Upstream commit 2ecfd946169e7f56534db2a5f6935858be3005ba ]
-
-driver.h is common header to whole mlx5 code base, but struct
-mlx5_qp_table is used in mlx5_ib driver only. So move that struct
-to be under sole responsibility of mlx5_ib.
-
-Link: https://lore.kernel.org/r/bec0dc1158e795813b135d1143147977f26bf668.1685953497.git.leon@kernel.org
-Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
-Stable-dep-of: c534ffda781f ("RDMA/mlx5: Fix AH static rate parsing")
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- drivers/infiniband/hw/mlx5/mlx5_ib.h |  1 +
- drivers/infiniband/hw/mlx5/qp.h      | 11 ++++++++++-
- include/linux/mlx5/driver.h          |  9 ---------
- 3 files changed, 11 insertions(+), 10 deletions(-)
-
-diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
-index 024d2071c6a5d..5c533023a51a4 100644
---- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
-+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
-@@ -25,6 +25,7 @@
- #include <rdma/mlx5_user_ioctl_verbs.h>
- #include "srq.h"
-+#include "qp.h"
- #define mlx5_ib_dbg(_dev, format, arg...)                                      \
-       dev_dbg(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__,      \
-diff --git a/drivers/infiniband/hw/mlx5/qp.h b/drivers/infiniband/hw/mlx5/qp.h
-index fb2f4e030bb8f..e677fa0ca4226 100644
---- a/drivers/infiniband/hw/mlx5/qp.h
-+++ b/drivers/infiniband/hw/mlx5/qp.h
-@@ -6,7 +6,16 @@
- #ifndef _MLX5_IB_QP_H
- #define _MLX5_IB_QP_H
--#include "mlx5_ib.h"
-+struct mlx5_ib_dev;
-+
-+struct mlx5_qp_table {
-+      struct notifier_block nb;
-+
-+      /* protect radix tree
-+       */
-+      spinlock_t lock;
-+      struct radix_tree_root tree;
-+};
- int mlx5_init_qp_table(struct mlx5_ib_dev *dev);
- void mlx5_cleanup_qp_table(struct mlx5_ib_dev *dev);
-diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
-index 6cea62ca76d6b..060610183fdf9 100644
---- a/include/linux/mlx5/driver.h
-+++ b/include/linux/mlx5/driver.h
-@@ -440,15 +440,6 @@ struct mlx5_core_health {
-       struct delayed_work             update_fw_log_ts_work;
- };
--struct mlx5_qp_table {
--      struct notifier_block   nb;
--
--      /* protect radix tree
--       */
--      spinlock_t              lock;
--      struct radix_tree_root  tree;
--};
--
- enum {
-       MLX5_PF_NOTIFY_DISABLE_VF,
-       MLX5_PF_NOTIFY_ENABLE_VF,
--- 
-2.39.5
-
diff --git a/queue-6.1/rdma-mlx5-remove-extra-unlock-on-error-path.patch b/queue-6.1/rdma-mlx5-remove-extra-unlock-on-error-path.patch
deleted file mode 100644 (file)
index 4961a41..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-From c1eb2512596fb3542357bb6c34c286f5e0374538 Mon Sep 17 00:00:00 2001
-From: Jason Gunthorpe <jgg@nvidia.com>
-Date: Tue, 28 May 2024 15:52:52 +0300
-Subject: RDMA/mlx5: Remove extra unlock on error path
-
-From: Jason Gunthorpe <jgg@nvidia.com>
-
-commit c1eb2512596fb3542357bb6c34c286f5e0374538 upstream.
-
-The below commit lifted the locking out of this function but left this
-error path unlock behind resulting in unbalanced locking. Remove the
-missed unlock too.
-
-Cc: stable@vger.kernel.org
-Fixes: 627122280c87 ("RDMA/mlx5: Add work to remove temporary entries from the cache")
-Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
-Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
-Link: https://lore.kernel.org/r/78090c210c750f47219b95248f9f782f34548bb1.1716900410.git.leon@kernel.org
-Signed-off-by: Leon Romanovsky <leon@kernel.org>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- drivers/infiniband/hw/mlx5/mr.c |    4 +---
- 1 file changed, 1 insertion(+), 3 deletions(-)
-
---- a/drivers/infiniband/hw/mlx5/mr.c
-+++ b/drivers/infiniband/hw/mlx5/mr.c
-@@ -638,10 +638,8 @@ static int mlx5_cache_ent_insert(struct
-                       new = &((*new)->rb_left);
-               if (cmp < 0)
-                       new = &((*new)->rb_right);
--              if (cmp == 0) {
--                      mutex_unlock(&cache->rb_lock);
-+              if (cmp == 0)
-                       return -EEXIST;
--              }
-       }
-       /* Add new node and rebalance tree. */
diff --git a/queue-6.1/rdma-mlx5-remove-implicit-odp-cache-entry.patch b/queue-6.1/rdma-mlx5-remove-implicit-odp-cache-entry.patch
deleted file mode 100644 (file)
index 9ee1e9e..0000000
+++ /dev/null
@@ -1,83 +0,0 @@
-From f1cf3c129548533fa9dc9569a22ff1ed3e3c9e02 Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Thu, 26 Jan 2023 00:28:03 +0200
-Subject: RDMA/mlx5: Remove implicit ODP cache entry
-
-From: Aharon Landau <aharonl@nvidia.com>
-
-[ Upstream commit 18b1746bddf5e7f6b2618966596d9517172a5cd7 ]
-
-Implicit ODP mkey doesn't have unique properties. It shares the same
-properties as the order 18 cache entry. There is no need to devote a
-special entry for that.
-
-Link: https://lore.kernel.org/r/20230125222807.6921-3-michaelgur@nvidia.com
-Signed-off-by: Aharon Landau <aharonl@nvidia.com>
-Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
-Stable-dep-of: d97505baea64 ("RDMA/mlx5: Fix the recovery flow of the UMR QP")
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- drivers/infiniband/hw/mlx5/odp.c | 20 +++++---------------
- include/linux/mlx5/driver.h      |  1 -
- 2 files changed, 5 insertions(+), 16 deletions(-)
-
-diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
-index a5c9baec8be85..5f0a17382de73 100644
---- a/drivers/infiniband/hw/mlx5/odp.c
-+++ b/drivers/infiniband/hw/mlx5/odp.c
-@@ -406,6 +406,7 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev,
- static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
-                                               unsigned long idx)
- {
-+      int order = order_base_2(MLX5_IMR_MTT_ENTRIES);
-       struct mlx5_ib_dev *dev = mr_to_mdev(imr);
-       struct ib_umem_odp *odp;
-       struct mlx5_ib_mr *mr;
-@@ -418,7 +419,8 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
-       if (IS_ERR(odp))
-               return ERR_CAST(odp);
--      mr = mlx5_mr_cache_alloc(dev, &dev->cache.ent[MLX5_IMR_MTT_CACHE_ENTRY],
-+      BUILD_BUG_ON(order > MKEY_CACHE_LAST_STD_ENTRY);
-+      mr = mlx5_mr_cache_alloc(dev, &dev->cache.ent[order],
-                                imr->access_flags);
-       if (IS_ERR(mr)) {
-               ib_umem_odp_release(odp);
-@@ -1595,20 +1597,8 @@ void mlx5_odp_init_mkey_cache_entry(struct mlx5_cache_ent *ent)
- {
-       if (!(ent->dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
-               return;
--
--      switch (ent->order - 2) {
--      case MLX5_IMR_MTT_CACHE_ENTRY:
--              ent->ndescs = MLX5_IMR_MTT_ENTRIES;
--              ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
--              ent->limit = 0;
--              break;
--
--      case MLX5_IMR_KSM_CACHE_ENTRY:
--              ent->ndescs = mlx5_imr_ksm_entries;
--              ent->access_mode = MLX5_MKC_ACCESS_MODE_KSM;
--              ent->limit = 0;
--              break;
--      }
-+      ent->ndescs = mlx5_imr_ksm_entries;
-+      ent->access_mode = MLX5_MKC_ACCESS_MODE_KSM;
- }
- static const struct ib_device_ops mlx5_ib_dev_odp_ops = {
-diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
-index 3c3e0f26c2446..6cea62ca76d6b 100644
---- a/include/linux/mlx5/driver.h
-+++ b/include/linux/mlx5/driver.h
-@@ -744,7 +744,6 @@ enum {
- enum {
-       MKEY_CACHE_LAST_STD_ENTRY = 20,
--      MLX5_IMR_MTT_CACHE_ENTRY,
-       MLX5_IMR_KSM_CACHE_ENTRY,
-       MAX_MKEY_CACHE_ENTRIES
- };
--- 
-2.39.5
-
diff --git a/queue-6.1/rdma-mlx5-remove-not-used-cache-disable-flag.patch b/queue-6.1/rdma-mlx5-remove-not-used-cache-disable-flag.patch
deleted file mode 100644 (file)
index 0a384b9..0000000
+++ /dev/null
@@ -1,53 +0,0 @@
-From c99a7457e5bb873914a74307ba2df85f6799203b Mon Sep 17 00:00:00 2001
-From: Leon Romanovsky <leonro@nvidia.com>
-Date: Thu, 28 Sep 2023 20:20:47 +0300
-Subject: RDMA/mlx5: Remove not-used cache disable flag
-
-From: Leon Romanovsky <leonro@nvidia.com>
-
-commit c99a7457e5bb873914a74307ba2df85f6799203b upstream.
-
-During execution of mlx5_mkey_cache_cleanup(), there is a guarantee
-that MR are not registered and/or destroyed. It means that we don't
-need newly introduced cache disable flag.
-
-Fixes: 374012b00457 ("RDMA/mlx5: Fix mkey cache possible deadlock on cleanup")
-Link: https://lore.kernel.org/r/c7e9c9f98c8ae4a7413d97d9349b29f5b0a23dbe.1695921626.git.leon@kernel.org
-Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- drivers/infiniband/hw/mlx5/mlx5_ib.h |    1 -
- drivers/infiniband/hw/mlx5/mr.c      |    5 -----
- 2 files changed, 6 deletions(-)
-
---- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
-+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
-@@ -822,7 +822,6 @@ struct mlx5_mkey_cache {
-       struct dentry           *fs_root;
-       unsigned long           last_add;
-       struct delayed_work     remove_ent_dwork;
--      u8                      disable: 1;
- };
- struct mlx5_ib_port_resources {
---- a/drivers/infiniband/hw/mlx5/mr.c
-+++ b/drivers/infiniband/hw/mlx5/mr.c
-@@ -1007,7 +1007,6 @@ int mlx5_mkey_cache_cleanup(struct mlx5_
-               return 0;
-       mutex_lock(&dev->cache.rb_lock);
--      dev->cache.disable = true;
-       for (node = rb_first(root); node; node = rb_next(node)) {
-               ent = rb_entry(node, struct mlx5_cache_ent, node);
-               spin_lock_irq(&ent->mkeys_queue.lock);
-@@ -1810,10 +1809,6 @@ static int cache_ent_find_and_store(stru
-       }
-       mutex_lock(&cache->rb_lock);
--      if (cache->disable) {
--              mutex_unlock(&cache->rb_lock);
--              return 0;
--      }
-       ent = mkey_cache_ent_from_rb_key(dev, mr->mmkey.rb_key);
-       if (ent) {
-               if (ent->rb_key.ndescs == mr->mmkey.rb_key.ndescs) {
diff --git a/queue-6.1/rdma-mlx5-uncacheable-mkey-has-neither-rb_key-or-cache_ent.patch b/queue-6.1/rdma-mlx5-uncacheable-mkey-has-neither-rb_key-or-cache_ent.patch
deleted file mode 100644 (file)
index 4778718..0000000
+++ /dev/null
@@ -1,33 +0,0 @@
-From 0611a8e8b475fc5230b9a24d29c8397aaab20b63 Mon Sep 17 00:00:00 2001
-From: Or Har-Toov <ohartoov@nvidia.com>
-Date: Wed, 3 Apr 2024 13:35:59 +0300
-Subject: RDMA/mlx5: Uncacheable mkey has neither rb_key or cache_ent
-
-From: Or Har-Toov <ohartoov@nvidia.com>
-
-commit 0611a8e8b475fc5230b9a24d29c8397aaab20b63 upstream.
-
-As some mkeys can't be modified with UMR due to some UMR limitations,
-like the size of translation that can be updated, not all user mkeys can
-be cached.
-
-Fixes: dd1b913fb0d0 ("RDMA/mlx5: Cache all user cacheable mkeys on dereg MR flow")
-Signed-off-by: Or Har-Toov <ohartoov@nvidia.com>
-Link: https://lore.kernel.org/r/f2742dd934ed73b2d32c66afb8e91b823063880c.1712140377.git.leon@kernel.org
-Signed-off-by: Leon Romanovsky <leon@kernel.org>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- drivers/infiniband/hw/mlx5/mlx5_ib.h |    2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
---- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
-+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
-@@ -651,7 +651,7 @@ struct mlx5_ib_mkey {
-       unsigned int ndescs;
-       struct wait_queue_head wait;
-       refcount_t usecount;
--      /* User Mkey must hold either a rb_key or a cache_ent. */
-+      /* Cacheable user Mkey must hold either a rb_key or a cache_ent. */
-       struct mlx5r_cache_rb_key rb_key;
-       struct mlx5_cache_ent *cache_ent;
- };
index 3dfeae4dfffaf5428082dd71864249766bf6c3a9..f91c976f30cd3bb790796f651ebe4b0fc9b07b6e 100644 (file)
@@ -91,22 +91,10 @@ media-mediatek-vcodec-fix-h264-multi-stateless-decoder-smatch-warning.patch
 spi-atmel-quadspi-avoid-overwriting-delay-register-settings.patch
 spi-atmel-quadspi-fix-wrong-register-value-written-to-mr.patch
 netfilter-allow-exp-not-to-be-removed-in-nf_ct_find_expectation.patch
-rdma-mlx5-don-t-keep-umrable-page_shift-in-cache-ent.patch
-rdma-mlx5-remove-implicit-odp-cache-entry.patch
-rdma-mlx5-change-the-cache-structure-to-an-rb-tree.patch
-rdma-mlx5-introduce-mlx5r_cache_rb_key.patch
-rdma-mlx5-cache-all-user-cacheable-mkeys-on-dereg-mr.patch
-rdma-mlx5-add-work-to-remove-temporary-entries-from-.patch
-rdma-mlx5-implement-mkeys-management-via-lifo-queue.patch
-rdma-mlx5-fix-the-recovery-flow-of-the-umr-qp.patch
 ib-mlx5-set-and-get-correct-qp_num-for-a-dct-qp.patch
 ovl-fix-uaf-in-ovl_dentry_update_reval-by-moving-dpu.patch
 sunrpc-convert-rpc_task_-constants-to-enum.patch
 sunrpc-prevent-looping-due-to-rpc_signal_task-races.patch
-rdma-mlx-calling-qp-event-handler-in-workqueue-conte.patch
-rdma-mlx5-reduce-qp-table-exposure.patch
-ib-core-add-support-for-xdr-link-speed.patch
-rdma-mlx5-fix-ah-static-rate-parsing.patch
 scsi-core-clear-driver-private-data-when-retrying-re.patch
 rdma-mlx5-fix-bind-qp-error-cleanup-flow.patch
 sunrpc-suppress-warnings-for-unused-procfs-functions.patch
@@ -171,17 +159,3 @@ mm-memory-use-exception-ip-to-search-exception-tables.patch
 squashfs-check-the-inode-number-is-not-the-invalid-value-of-zero.patch
 pfifo_tail_enqueue-drop-new-packet-when-sch-limit-0.patch
 media-mtk-vcodec-potential-null-pointer-deference-in-scp.patch
-rdma-mlx5-fix-mr-cache-debugfs-error-in-ib-representors-mode.patch
-rdma-mlx5-check-reg_create-create-for-errors.patch
-rdma-mlx5-fix-mkey-cache-possible-deadlock-on-cleanup.patch
-rdma-mlx5-fix-assigning-access-flags-to-cache-mkeys.patch
-rdma-mlx5-uncacheable-mkey-has-neither-rb_key-or-cache_ent.patch
-rdma-mlx5-change-check-for-cacheable-mkeys.patch
-rdma-mlx5-remove-extra-unlock-on-error-path.patch
-rdma-mlx5-follow-rb_key.ats-when-creating-new-mkeys.patch
-rdma-mlx5-ensure-created-mkeys-always-have-a-populated-rb_key.patch
-rdma-mlx5-fix-counter-update-on-mr-cache-mkey-creation.patch
-rdma-mlx5-limit-usage-of-over-sized-mkeys-from-the-mr-cache.patch
-rdma-mlx5-remove-not-used-cache-disable-flag.patch
-rdma-mlx5-fix-mkey-cache-wq-flush.patch
-rdma-mlx5-fix-mr-cache-temp-entries-cleanup.patch