From: Greg Kroah-Hartman Date: Thu, 6 Mar 2025 14:29:56 +0000 (+0100) Subject: drop a bunch of rdma patches from 6.1 X-Git-Tag: v6.6.81~1 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=a8f5f65064b868cbde7677192261292c144baa87;p=thirdparty%2Fkernel%2Fstable-queue.git drop a bunch of rdma patches from 6.1 Not all were in 6.6 and the fixes for the fixes were getting just too deep and messy to make it worth it. --- diff --git a/queue-6.1/ib-core-add-support-for-xdr-link-speed.patch b/queue-6.1/ib-core-add-support-for-xdr-link-speed.patch deleted file mode 100644 index 597def3d98..0000000000 --- a/queue-6.1/ib-core-add-support-for-xdr-link-speed.patch +++ /dev/null @@ -1,131 +0,0 @@ -From 79cad3705d28ff0c133bcd85a9107d0dbbb27e72 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Wed, 20 Sep 2023 13:07:40 +0300 -Subject: IB/core: Add support for XDR link speed - -From: Or Har-Toov - -[ Upstream commit 703289ce43f740b0096724300107df82d008552f ] - -Add new IBTA speed XDR, the new rate that was added to Infiniband spec -as part of XDR and supporting signaling rate of 200Gb. - -In order to report that value to rdma-core, add new u32 field to -query_port response. - -Signed-off-by: Or Har-Toov -Reviewed-by: Mark Zhang -Link: https://lore.kernel.org/r/9d235fc600a999e8274010f0e18b40fa60540e6c.1695204156.git.leon@kernel.org -Reviewed-by: Jacob Keller -Signed-off-by: Leon Romanovsky -Stable-dep-of: c534ffda781f ("RDMA/mlx5: Fix AH static rate parsing") -Signed-off-by: Sasha Levin ---- - drivers/infiniband/core/sysfs.c | 4 ++++ - drivers/infiniband/core/uverbs_std_types_device.c | 3 ++- - drivers/infiniband/core/verbs.c | 3 +++ - include/rdma/ib_verbs.h | 2 ++ - include/uapi/rdma/ib_user_ioctl_verbs.h | 3 ++- - 5 files changed, 13 insertions(+), 2 deletions(-) - -diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c -index ec5efdc166601..9f97bef021497 100644 ---- a/drivers/infiniband/core/sysfs.c -+++ b/drivers/infiniband/core/sysfs.c -@@ -342,6 +342,10 @@ static ssize_t rate_show(struct ib_device *ibdev, u32 port_num, - speed = " NDR"; - rate = 1000; - break; -+ case IB_SPEED_XDR: -+ speed = " XDR"; -+ rate = 2000; -+ break; - case IB_SPEED_SDR: - default: /* default to SDR for invalid rates */ - speed = " SDR"; -diff --git a/drivers/infiniband/core/uverbs_std_types_device.c b/drivers/infiniband/core/uverbs_std_types_device.c -index 049684880ae03..fb0555647336f 100644 ---- a/drivers/infiniband/core/uverbs_std_types_device.c -+++ b/drivers/infiniband/core/uverbs_std_types_device.c -@@ -203,6 +203,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_PORT)( - - copy_port_attr_to_resp(&attr, &resp.legacy_resp, ib_dev, port_num); - resp.port_cap_flags2 = attr.port_cap_flags2; -+ resp.active_speed_ex = attr.active_speed; - - return uverbs_copy_to_struct_or_zero(attrs, UVERBS_ATTR_QUERY_PORT_RESP, - &resp, sizeof(resp)); -@@ -461,7 +462,7 @@ DECLARE_UVERBS_NAMED_METHOD( - UVERBS_ATTR_PTR_OUT( - UVERBS_ATTR_QUERY_PORT_RESP, - UVERBS_ATTR_STRUCT(struct ib_uverbs_query_port_resp_ex, -- reserved), -+ active_speed_ex), - UA_MANDATORY)); - - DECLARE_UVERBS_NAMED_METHOD( -diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c -index b99b3cc283b65..90848546f1704 100644 ---- a/drivers/infiniband/core/verbs.c -+++ b/drivers/infiniband/core/verbs.c -@@ -147,6 +147,7 @@ __attribute_const__ int ib_rate_to_mult(enum ib_rate rate) - case IB_RATE_50_GBPS: return 20; - case IB_RATE_400_GBPS: return 160; - case IB_RATE_600_GBPS: return 240; -+ case IB_RATE_800_GBPS: return 320; - default: return -1; - } - } -@@ -176,6 +177,7 @@ __attribute_const__ enum ib_rate mult_to_ib_rate(int mult) - case 20: return IB_RATE_50_GBPS; - case 160: return IB_RATE_400_GBPS; - case 240: return IB_RATE_600_GBPS; -+ case 320: return IB_RATE_800_GBPS; - default: return IB_RATE_PORT_CURRENT; - } - } -@@ -205,6 +207,7 @@ __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate) - case IB_RATE_50_GBPS: return 53125; - case IB_RATE_400_GBPS: return 425000; - case IB_RATE_600_GBPS: return 637500; -+ case IB_RATE_800_GBPS: return 850000; - default: return -1; - } - } -diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h -index 68fd6d22adfd4..750effb875783 100644 ---- a/include/rdma/ib_verbs.h -+++ b/include/rdma/ib_verbs.h -@@ -557,6 +557,7 @@ enum ib_port_speed { - IB_SPEED_EDR = 32, - IB_SPEED_HDR = 64, - IB_SPEED_NDR = 128, -+ IB_SPEED_XDR = 256, - }; - - enum ib_stat_flag { -@@ -836,6 +837,7 @@ enum ib_rate { - IB_RATE_50_GBPS = 20, - IB_RATE_400_GBPS = 21, - IB_RATE_600_GBPS = 22, -+ IB_RATE_800_GBPS = 23, - }; - - /** -diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h -index 7dd56210226f5..125fb9f0ef4ab 100644 ---- a/include/uapi/rdma/ib_user_ioctl_verbs.h -+++ b/include/uapi/rdma/ib_user_ioctl_verbs.h -@@ -218,7 +218,8 @@ enum ib_uverbs_advise_mr_flag { - struct ib_uverbs_query_port_resp_ex { - struct ib_uverbs_query_port_resp legacy_resp; - __u16 port_cap_flags2; -- __u8 reserved[6]; -+ __u8 reserved[2]; -+ __u32 active_speed_ex; - }; - - struct ib_uverbs_qp_cap { --- -2.39.5 - diff --git a/queue-6.1/rdma-mlx-calling-qp-event-handler-in-workqueue-conte.patch b/queue-6.1/rdma-mlx-calling-qp-event-handler-in-workqueue-conte.patch deleted file mode 100644 index 70a6ed3b22..0000000000 --- a/queue-6.1/rdma-mlx-calling-qp-event-handler-in-workqueue-conte.patch +++ /dev/null @@ -1,502 +0,0 @@ -From d5eccf1fd4fbdb90e3f1aba4e5ba5928ea3163c2 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Wed, 4 Jan 2023 11:43:34 +0200 -Subject: RDMA/mlx: Calling qp event handler in workqueue context - -From: Mark Zhang - -[ Upstream commit 312b8f79eb05479628ee71357749815b2eeeeea8 ] - -Move the call of qp event handler from atomic to workqueue context, -so that the handler is able to block. This is needed by following -patches. - -Signed-off-by: Mark Zhang -Reviewed-by: Patrisious Haddad -Link: https://lore.kernel.org/r/0cd17b8331e445f03942f4bb28d447f24ac5669d.1672821186.git.leonro@nvidia.com -Signed-off-by: Leon Romanovsky -Stable-dep-of: c534ffda781f ("RDMA/mlx5: Fix AH static rate parsing") -Signed-off-by: Sasha Levin ---- - drivers/infiniband/hw/mlx4/main.c | 8 ++ - drivers/infiniband/hw/mlx4/mlx4_ib.h | 3 + - drivers/infiniband/hw/mlx4/qp.c | 121 +++++++++++++++++------- - drivers/infiniband/hw/mlx5/main.c | 7 ++ - drivers/infiniband/hw/mlx5/qp.c | 119 ++++++++++++++++------- - drivers/infiniband/hw/mlx5/qp.h | 2 + - drivers/infiniband/hw/mlx5/qpc.c | 3 +- - drivers/net/ethernet/mellanox/mlx4/qp.c | 14 ++- - include/linux/mlx4/qp.h | 1 + - include/rdma/ib_verbs.h | 2 +- - 10 files changed, 202 insertions(+), 78 deletions(-) - -diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c -index 7c3dc86ab7f04..0f0b130cc8aac 100644 ---- a/drivers/infiniband/hw/mlx4/main.c -+++ b/drivers/infiniband/hw/mlx4/main.c -@@ -3307,6 +3307,10 @@ static int __init mlx4_ib_init(void) - if (!wq) - return -ENOMEM; - -+ err = mlx4_ib_qp_event_init(); -+ if (err) -+ goto clean_qp_event; -+ - err = mlx4_ib_cm_init(); - if (err) - goto clean_wq; -@@ -3328,6 +3332,9 @@ static int __init mlx4_ib_init(void) - mlx4_ib_cm_destroy(); - - clean_wq: -+ mlx4_ib_qp_event_cleanup(); -+ -+clean_qp_event: - destroy_workqueue(wq); - return err; - } -@@ -3337,6 +3344,7 @@ static void __exit mlx4_ib_cleanup(void) - mlx4_unregister_interface(&mlx4_ib_interface); - mlx4_ib_mcg_destroy(); - mlx4_ib_cm_destroy(); -+ mlx4_ib_qp_event_cleanup(); - destroy_workqueue(wq); - } - -diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h -index 6a3b0f121045e..17fee1e73a45a 100644 ---- a/drivers/infiniband/hw/mlx4/mlx4_ib.h -+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h -@@ -940,4 +940,7 @@ int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va, - int mlx4_ib_cm_init(void); - void mlx4_ib_cm_destroy(void); - -+int mlx4_ib_qp_event_init(void); -+void mlx4_ib_qp_event_cleanup(void); -+ - #endif /* MLX4_IB_H */ -diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c -index ac479e81ddee8..9d08aa99f3cb0 100644 ---- a/drivers/infiniband/hw/mlx4/qp.c -+++ b/drivers/infiniband/hw/mlx4/qp.c -@@ -102,6 +102,14 @@ enum mlx4_ib_source_type { - MLX4_IB_RWQ_SRC = 1, - }; - -+struct mlx4_ib_qp_event_work { -+ struct work_struct work; -+ struct mlx4_qp *qp; -+ enum mlx4_event type; -+}; -+ -+static struct workqueue_struct *mlx4_ib_qp_event_wq; -+ - static int is_tunnel_qp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) - { - if (!mlx4_is_master(dev->dev)) -@@ -200,50 +208,77 @@ static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n) - } - } - -+static void mlx4_ib_handle_qp_event(struct work_struct *_work) -+{ -+ struct mlx4_ib_qp_event_work *qpe_work = -+ container_of(_work, struct mlx4_ib_qp_event_work, work); -+ struct ib_qp *ibqp = &to_mibqp(qpe_work->qp)->ibqp; -+ struct ib_event event = {}; -+ -+ event.device = ibqp->device; -+ event.element.qp = ibqp; -+ -+ switch (qpe_work->type) { -+ case MLX4_EVENT_TYPE_PATH_MIG: -+ event.event = IB_EVENT_PATH_MIG; -+ break; -+ case MLX4_EVENT_TYPE_COMM_EST: -+ event.event = IB_EVENT_COMM_EST; -+ break; -+ case MLX4_EVENT_TYPE_SQ_DRAINED: -+ event.event = IB_EVENT_SQ_DRAINED; -+ break; -+ case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE: -+ event.event = IB_EVENT_QP_LAST_WQE_REACHED; -+ break; -+ case MLX4_EVENT_TYPE_WQ_CATAS_ERROR: -+ event.event = IB_EVENT_QP_FATAL; -+ break; -+ case MLX4_EVENT_TYPE_PATH_MIG_FAILED: -+ event.event = IB_EVENT_PATH_MIG_ERR; -+ break; -+ case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR: -+ event.event = IB_EVENT_QP_REQ_ERR; -+ break; -+ case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR: -+ event.event = IB_EVENT_QP_ACCESS_ERR; -+ break; -+ default: -+ pr_warn("Unexpected event type %d on QP %06x\n", -+ qpe_work->type, qpe_work->qp->qpn); -+ goto out; -+ } -+ -+ ibqp->event_handler(&event, ibqp->qp_context); -+ -+out: -+ mlx4_put_qp(qpe_work->qp); -+ kfree(qpe_work); -+} -+ - static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type) - { -- struct ib_event event; - struct ib_qp *ibqp = &to_mibqp(qp)->ibqp; -+ struct mlx4_ib_qp_event_work *qpe_work; - - if (type == MLX4_EVENT_TYPE_PATH_MIG) - to_mibqp(qp)->port = to_mibqp(qp)->alt_port; - -- if (ibqp->event_handler) { -- event.device = ibqp->device; -- event.element.qp = ibqp; -- switch (type) { -- case MLX4_EVENT_TYPE_PATH_MIG: -- event.event = IB_EVENT_PATH_MIG; -- break; -- case MLX4_EVENT_TYPE_COMM_EST: -- event.event = IB_EVENT_COMM_EST; -- break; -- case MLX4_EVENT_TYPE_SQ_DRAINED: -- event.event = IB_EVENT_SQ_DRAINED; -- break; -- case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE: -- event.event = IB_EVENT_QP_LAST_WQE_REACHED; -- break; -- case MLX4_EVENT_TYPE_WQ_CATAS_ERROR: -- event.event = IB_EVENT_QP_FATAL; -- break; -- case MLX4_EVENT_TYPE_PATH_MIG_FAILED: -- event.event = IB_EVENT_PATH_MIG_ERR; -- break; -- case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR: -- event.event = IB_EVENT_QP_REQ_ERR; -- break; -- case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR: -- event.event = IB_EVENT_QP_ACCESS_ERR; -- break; -- default: -- pr_warn("Unexpected event type %d " -- "on QP %06x\n", type, qp->qpn); -- return; -- } -+ if (!ibqp->event_handler) -+ goto out_no_handler; - -- ibqp->event_handler(&event, ibqp->qp_context); -- } -+ qpe_work = kzalloc(sizeof(*qpe_work), GFP_ATOMIC); -+ if (!qpe_work) -+ goto out_no_handler; -+ -+ qpe_work->qp = qp; -+ qpe_work->type = type; -+ INIT_WORK(&qpe_work->work, mlx4_ib_handle_qp_event); -+ queue_work(mlx4_ib_qp_event_wq, &qpe_work->work); -+ return; -+ -+out_no_handler: -+ mlx4_put_qp(qp); - } - - static void mlx4_ib_wq_event(struct mlx4_qp *qp, enum mlx4_event type) -@@ -4472,3 +4507,17 @@ void mlx4_ib_drain_rq(struct ib_qp *qp) - - handle_drain_completion(cq, &rdrain, dev); - } -+ -+int mlx4_ib_qp_event_init(void) -+{ -+ mlx4_ib_qp_event_wq = alloc_ordered_workqueue("mlx4_ib_qp_event_wq", 0); -+ if (!mlx4_ib_qp_event_wq) -+ return -ENOMEM; -+ -+ return 0; -+} -+ -+void mlx4_ib_qp_event_cleanup(void) -+{ -+ destroy_workqueue(mlx4_ib_qp_event_wq); -+} -diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c -index 45a414e8d35fa..a22649617e017 100644 ---- a/drivers/infiniband/hw/mlx5/main.c -+++ b/drivers/infiniband/hw/mlx5/main.c -@@ -4410,6 +4410,10 @@ static int __init mlx5_ib_init(void) - return -ENOMEM; - } - -+ ret = mlx5_ib_qp_event_init(); -+ if (ret) -+ goto qp_event_err; -+ - mlx5_ib_odp_init(); - ret = mlx5r_rep_init(); - if (ret) -@@ -4427,6 +4431,8 @@ static int __init mlx5_ib_init(void) - mp_err: - mlx5r_rep_cleanup(); - rep_err: -+ mlx5_ib_qp_event_cleanup(); -+qp_event_err: - destroy_workqueue(mlx5_ib_event_wq); - free_page((unsigned long)xlt_emergency_page); - return ret; -@@ -4438,6 +4444,7 @@ static void __exit mlx5_ib_cleanup(void) - auxiliary_driver_unregister(&mlx5r_mp_driver); - mlx5r_rep_cleanup(); - -+ mlx5_ib_qp_event_cleanup(); - destroy_workqueue(mlx5_ib_event_wq); - free_page((unsigned long)xlt_emergency_page); - } -diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c -index d782a494abcda..43c0123babd10 100644 ---- a/drivers/infiniband/hw/mlx5/qp.c -+++ b/drivers/infiniband/hw/mlx5/qp.c -@@ -71,6 +71,14 @@ struct mlx5_modify_raw_qp_param { - u32 port; - }; - -+struct mlx5_ib_qp_event_work { -+ struct work_struct work; -+ struct mlx5_core_qp *qp; -+ int type; -+}; -+ -+static struct workqueue_struct *mlx5_ib_qp_event_wq; -+ - static void get_cqs(enum ib_qp_type qp_type, - struct ib_cq *ib_send_cq, struct ib_cq *ib_recv_cq, - struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq); -@@ -302,51 +310,78 @@ int mlx5_ib_read_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index, void *buffer, - return mlx5_ib_read_user_wqe_srq(srq, wqe_index, buffer, buflen, bc); - } - -+static void mlx5_ib_handle_qp_event(struct work_struct *_work) -+{ -+ struct mlx5_ib_qp_event_work *qpe_work = -+ container_of(_work, struct mlx5_ib_qp_event_work, work); -+ struct ib_qp *ibqp = &to_mibqp(qpe_work->qp)->ibqp; -+ struct ib_event event = {}; -+ -+ event.device = ibqp->device; -+ event.element.qp = ibqp; -+ switch (qpe_work->type) { -+ case MLX5_EVENT_TYPE_PATH_MIG: -+ event.event = IB_EVENT_PATH_MIG; -+ break; -+ case MLX5_EVENT_TYPE_COMM_EST: -+ event.event = IB_EVENT_COMM_EST; -+ break; -+ case MLX5_EVENT_TYPE_SQ_DRAINED: -+ event.event = IB_EVENT_SQ_DRAINED; -+ break; -+ case MLX5_EVENT_TYPE_SRQ_LAST_WQE: -+ event.event = IB_EVENT_QP_LAST_WQE_REACHED; -+ break; -+ case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: -+ event.event = IB_EVENT_QP_FATAL; -+ break; -+ case MLX5_EVENT_TYPE_PATH_MIG_FAILED: -+ event.event = IB_EVENT_PATH_MIG_ERR; -+ break; -+ case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: -+ event.event = IB_EVENT_QP_REQ_ERR; -+ break; -+ case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: -+ event.event = IB_EVENT_QP_ACCESS_ERR; -+ break; -+ default: -+ pr_warn("mlx5_ib: Unexpected event type %d on QP %06x\n", -+ qpe_work->type, qpe_work->qp->qpn); -+ goto out; -+ } -+ -+ ibqp->event_handler(&event, ibqp->qp_context); -+ -+out: -+ mlx5_core_res_put(&qpe_work->qp->common); -+ kfree(qpe_work); -+} -+ - static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type) - { - struct ib_qp *ibqp = &to_mibqp(qp)->ibqp; -- struct ib_event event; -+ struct mlx5_ib_qp_event_work *qpe_work; - - if (type == MLX5_EVENT_TYPE_PATH_MIG) { - /* This event is only valid for trans_qps */ - to_mibqp(qp)->port = to_mibqp(qp)->trans_qp.alt_port; - } - -- if (ibqp->event_handler) { -- event.device = ibqp->device; -- event.element.qp = ibqp; -- switch (type) { -- case MLX5_EVENT_TYPE_PATH_MIG: -- event.event = IB_EVENT_PATH_MIG; -- break; -- case MLX5_EVENT_TYPE_COMM_EST: -- event.event = IB_EVENT_COMM_EST; -- break; -- case MLX5_EVENT_TYPE_SQ_DRAINED: -- event.event = IB_EVENT_SQ_DRAINED; -- break; -- case MLX5_EVENT_TYPE_SRQ_LAST_WQE: -- event.event = IB_EVENT_QP_LAST_WQE_REACHED; -- break; -- case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: -- event.event = IB_EVENT_QP_FATAL; -- break; -- case MLX5_EVENT_TYPE_PATH_MIG_FAILED: -- event.event = IB_EVENT_PATH_MIG_ERR; -- break; -- case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: -- event.event = IB_EVENT_QP_REQ_ERR; -- break; -- case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: -- event.event = IB_EVENT_QP_ACCESS_ERR; -- break; -- default: -- pr_warn("mlx5_ib: Unexpected event type %d on QP %06x\n", type, qp->qpn); -- return; -- } -+ if (!ibqp->event_handler) -+ goto out_no_handler; - -- ibqp->event_handler(&event, ibqp->qp_context); -- } -+ qpe_work = kzalloc(sizeof(*qpe_work), GFP_ATOMIC); -+ if (!qpe_work) -+ goto out_no_handler; -+ -+ qpe_work->qp = qp; -+ qpe_work->type = type; -+ INIT_WORK(&qpe_work->work, mlx5_ib_handle_qp_event); -+ queue_work(mlx5_ib_qp_event_wq, &qpe_work->work); -+ return; -+ -+out_no_handler: -+ mlx5_core_res_put(&qp->common); - } - - static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap, -@@ -5752,3 +5787,17 @@ int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter) - mutex_unlock(&mqp->mutex); - return err; - } -+ -+int mlx5_ib_qp_event_init(void) -+{ -+ mlx5_ib_qp_event_wq = alloc_ordered_workqueue("mlx5_ib_qp_event_wq", 0); -+ if (!mlx5_ib_qp_event_wq) -+ return -ENOMEM; -+ -+ return 0; -+} -+ -+void mlx5_ib_qp_event_cleanup(void) -+{ -+ destroy_workqueue(mlx5_ib_qp_event_wq); -+} -diff --git a/drivers/infiniband/hw/mlx5/qp.h b/drivers/infiniband/hw/mlx5/qp.h -index 5d4e140db99ce..fb2f4e030bb8f 100644 ---- a/drivers/infiniband/hw/mlx5/qp.h -+++ b/drivers/infiniband/hw/mlx5/qp.h -@@ -44,4 +44,6 @@ void mlx5_core_res_put(struct mlx5_core_rsc_common *res); - int mlx5_core_xrcd_alloc(struct mlx5_ib_dev *dev, u32 *xrcdn); - int mlx5_core_xrcd_dealloc(struct mlx5_ib_dev *dev, u32 xrcdn); - int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter); -+int mlx5_ib_qp_event_init(void); -+void mlx5_ib_qp_event_cleanup(void); - #endif /* _MLX5_IB_QP_H */ -diff --git a/drivers/infiniband/hw/mlx5/qpc.c b/drivers/infiniband/hw/mlx5/qpc.c -index d4e7864c56f18..a824ff22f4615 100644 ---- a/drivers/infiniband/hw/mlx5/qpc.c -+++ b/drivers/infiniband/hw/mlx5/qpc.c -@@ -135,7 +135,8 @@ static int rsc_event_notifier(struct notifier_block *nb, - case MLX5_RES_SQ: - qp = (struct mlx5_core_qp *)common; - qp->event(qp, event_type); -- break; -+ /* Need to put resource in event handler */ -+ return NOTIFY_OK; - case MLX5_RES_DCT: - dct = (struct mlx5_core_dct *)common; - if (event_type == MLX5_EVENT_TYPE_DCT_DRAINED) -diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c -index 48cfaa7eaf50c..913ed255990f4 100644 ---- a/drivers/net/ethernet/mellanox/mlx4/qp.c -+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c -@@ -46,6 +46,13 @@ - #define MLX4_BF_QP_SKIP_MASK 0xc0 - #define MLX4_MAX_BF_QP_RANGE 0x40 - -+void mlx4_put_qp(struct mlx4_qp *qp) -+{ -+ if (refcount_dec_and_test(&qp->refcount)) -+ complete(&qp->free); -+} -+EXPORT_SYMBOL_GPL(mlx4_put_qp); -+ - void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type) - { - struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table; -@@ -64,10 +71,8 @@ void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type) - return; - } - -+ /* Need to call mlx4_put_qp() in event handler */ - qp->event(qp, event_type); -- -- if (refcount_dec_and_test(&qp->refcount)) -- complete(&qp->free); - } - - /* used for INIT/CLOSE port logic */ -@@ -523,8 +528,7 @@ EXPORT_SYMBOL_GPL(mlx4_qp_remove); - - void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp) - { -- if (refcount_dec_and_test(&qp->refcount)) -- complete(&qp->free); -+ mlx4_put_qp(qp); - wait_for_completion(&qp->free); - - mlx4_qp_free_icm(dev, qp->qpn); -diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h -index b6b626157b03a..b9a7b1319f5d3 100644 ---- a/include/linux/mlx4/qp.h -+++ b/include/linux/mlx4/qp.h -@@ -504,4 +504,5 @@ static inline u16 folded_qp(u32 q) - - u16 mlx4_qp_roce_entropy(struct mlx4_dev *dev, u32 qpn); - -+void mlx4_put_qp(struct mlx4_qp *qp); - #endif /* MLX4_QP_H */ -diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h -index 5582509003264..68fd6d22adfd4 100644 ---- a/include/rdma/ib_verbs.h -+++ b/include/rdma/ib_verbs.h -@@ -1162,7 +1162,7 @@ enum ib_qp_create_flags { - */ - - struct ib_qp_init_attr { -- /* Consumer's event_handler callback must not block */ -+ /* This callback occurs in workqueue context */ - void (*event_handler)(struct ib_event *, void *); - - void *qp_context; --- -2.39.5 - diff --git a/queue-6.1/rdma-mlx5-add-work-to-remove-temporary-entries-from-.patch b/queue-6.1/rdma-mlx5-add-work-to-remove-temporary-entries-from-.patch deleted file mode 100644 index 2eba6e0071..0000000000 --- a/queue-6.1/rdma-mlx5-add-work-to-remove-temporary-entries-from-.patch +++ /dev/null @@ -1,315 +0,0 @@ -From be147ad5b5dbf2b210768ce67d652ae3e1d6ddf1 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Thu, 26 Jan 2023 00:28:07 +0200 -Subject: RDMA/mlx5: Add work to remove temporary entries from the cache - -From: Michael Guralnik - -[ Upstream commit 627122280c878cf5d3cda2d2c5a0a8f6a7e35cb7 ] - -The non-cache mkeys are stored in the cache only to shorten restarting -application time. Don't store them longer than needed. - -Configure cache entries that store non-cache MRs as temporary entries. If -30 seconds have passed and no user reclaimed the temporarily cached mkeys, -an asynchronous work will destroy the mkeys entries. - -Link: https://lore.kernel.org/r/20230125222807.6921-7-michaelgur@nvidia.com -Signed-off-by: Michael Guralnik -Signed-off-by: Jason Gunthorpe -Stable-dep-of: d97505baea64 ("RDMA/mlx5: Fix the recovery flow of the UMR QP") -Signed-off-by: Sasha Levin ---- - drivers/infiniband/hw/mlx5/mlx5_ib.h | 9 ++- - drivers/infiniband/hw/mlx5/mr.c | 94 ++++++++++++++++++++++------ - drivers/infiniband/hw/mlx5/odp.c | 2 +- - 3 files changed, 82 insertions(+), 23 deletions(-) - -diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h -index f345e2ae394d2..7c72e0e9db54a 100644 ---- a/drivers/infiniband/hw/mlx5/mlx5_ib.h -+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h -@@ -770,6 +770,7 @@ struct mlx5_cache_ent { - struct rb_node node; - struct mlx5r_cache_rb_key rb_key; - -+ u8 is_tmp:1; - u8 disabled:1; - u8 fill_to_high_water:1; - -@@ -803,6 +804,7 @@ struct mlx5_mkey_cache { - struct mutex rb_lock; - struct dentry *fs_root; - unsigned long last_add; -+ struct delayed_work remove_ent_dwork; - }; - - struct mlx5_ib_port_resources { -@@ -1346,9 +1348,10 @@ void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num); - int mlx5_ib_get_cqe_size(struct ib_cq *ibcq); - int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev); - int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev); --struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev, -- struct mlx5r_cache_rb_key rb_key, -- bool persistent_entry); -+struct mlx5_cache_ent * -+mlx5r_cache_create_ent_locked(struct mlx5_ib_dev *dev, -+ struct mlx5r_cache_rb_key rb_key, -+ bool persistent_entry); - - struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, - int access_flags, int access_mode, -diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c -index bf1ca7565be67..2c1a935734273 100644 ---- a/drivers/infiniband/hw/mlx5/mr.c -+++ b/drivers/infiniband/hw/mlx5/mr.c -@@ -140,19 +140,16 @@ static void create_mkey_warn(struct mlx5_ib_dev *dev, int status, void *out) - mlx5_cmd_out_err(dev->mdev, MLX5_CMD_OP_CREATE_MKEY, 0, out); - } - -- --static int push_mkey(struct mlx5_cache_ent *ent, bool limit_pendings, -- void *to_store) -+static int push_mkey_locked(struct mlx5_cache_ent *ent, bool limit_pendings, -+ void *to_store) - { - XA_STATE(xas, &ent->mkeys, 0); - void *curr; - -- xa_lock_irq(&ent->mkeys); - if (limit_pendings && -- (ent->reserved - ent->stored) > MAX_PENDING_REG_MR) { -- xa_unlock_irq(&ent->mkeys); -+ (ent->reserved - ent->stored) > MAX_PENDING_REG_MR) - return -EAGAIN; -- } -+ - while (1) { - /* - * This is cmpxchg (NULL, XA_ZERO_ENTRY) however this version -@@ -191,6 +188,7 @@ static int push_mkey(struct mlx5_cache_ent *ent, bool limit_pendings, - break; - xa_lock_irq(&ent->mkeys); - } -+ xa_lock_irq(&ent->mkeys); - if (xas_error(&xas)) - return xas_error(&xas); - if (WARN_ON(curr)) -@@ -198,6 +196,17 @@ static int push_mkey(struct mlx5_cache_ent *ent, bool limit_pendings, - return 0; - } - -+static int push_mkey(struct mlx5_cache_ent *ent, bool limit_pendings, -+ void *to_store) -+{ -+ int ret; -+ -+ xa_lock_irq(&ent->mkeys); -+ ret = push_mkey_locked(ent, limit_pendings, to_store); -+ xa_unlock_irq(&ent->mkeys); -+ return ret; -+} -+ - static void undo_push_reserve_mkey(struct mlx5_cache_ent *ent) - { - void *old; -@@ -545,7 +554,7 @@ static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent) - { - lockdep_assert_held(&ent->mkeys.xa_lock); - -- if (ent->disabled || READ_ONCE(ent->dev->fill_delay)) -+ if (ent->disabled || READ_ONCE(ent->dev->fill_delay) || ent->is_tmp) - return; - if (ent->stored < ent->limit) { - ent->fill_to_high_water = true; -@@ -675,7 +684,6 @@ static int mlx5_cache_ent_insert(struct mlx5_mkey_cache *cache, - struct mlx5_cache_ent *cur; - int cmp; - -- mutex_lock(&cache->rb_lock); - /* Figure out where to put new node */ - while (*new) { - cur = rb_entry(*new, struct mlx5_cache_ent, node); -@@ -695,7 +703,6 @@ static int mlx5_cache_ent_insert(struct mlx5_mkey_cache *cache, - rb_link_node(&ent->node, parent, new); - rb_insert_color(&ent->node, &cache->rb_root); - -- mutex_unlock(&cache->rb_lock); - return 0; - } - -@@ -867,9 +874,10 @@ static void delay_time_func(struct timer_list *t) - WRITE_ONCE(dev->fill_delay, 0); - } - --struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev, -- struct mlx5r_cache_rb_key rb_key, -- bool persistent_entry) -+struct mlx5_cache_ent * -+mlx5r_cache_create_ent_locked(struct mlx5_ib_dev *dev, -+ struct mlx5r_cache_rb_key rb_key, -+ bool persistent_entry) - { - struct mlx5_cache_ent *ent; - int order; -@@ -882,6 +890,7 @@ struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev, - xa_init_flags(&ent->mkeys, XA_FLAGS_LOCK_IRQ); - ent->rb_key = rb_key; - ent->dev = dev; -+ ent->is_tmp = !persistent_entry; - - INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); - -@@ -905,11 +914,44 @@ struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev, - ent->limit = 0; - - mlx5_mkey_cache_debugfs_add_ent(dev, ent); -+ } else { -+ mod_delayed_work(ent->dev->cache.wq, -+ &ent->dev->cache.remove_ent_dwork, -+ msecs_to_jiffies(30 * 1000)); - } - - return ent; - } - -+static void remove_ent_work_func(struct work_struct *work) -+{ -+ struct mlx5_mkey_cache *cache; -+ struct mlx5_cache_ent *ent; -+ struct rb_node *cur; -+ -+ cache = container_of(work, struct mlx5_mkey_cache, -+ remove_ent_dwork.work); -+ mutex_lock(&cache->rb_lock); -+ cur = rb_last(&cache->rb_root); -+ while (cur) { -+ ent = rb_entry(cur, struct mlx5_cache_ent, node); -+ cur = rb_prev(cur); -+ mutex_unlock(&cache->rb_lock); -+ -+ xa_lock_irq(&ent->mkeys); -+ if (!ent->is_tmp) { -+ xa_unlock_irq(&ent->mkeys); -+ mutex_lock(&cache->rb_lock); -+ continue; -+ } -+ xa_unlock_irq(&ent->mkeys); -+ -+ clean_keys(ent->dev, ent); -+ mutex_lock(&cache->rb_lock); -+ } -+ mutex_unlock(&cache->rb_lock); -+} -+ - int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev) - { - struct mlx5_mkey_cache *cache = &dev->cache; -@@ -925,6 +967,7 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev) - mutex_init(&dev->slow_path_mutex); - mutex_init(&dev->cache.rb_lock); - dev->cache.rb_root = RB_ROOT; -+ INIT_DELAYED_WORK(&dev->cache.remove_ent_dwork, remove_ent_work_func); - cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM); - if (!cache->wq) { - mlx5_ib_warn(dev, "failed to create work queue\n"); -@@ -934,9 +977,10 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev) - mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx); - timer_setup(&dev->delay_timer, delay_time_func, 0); - mlx5_mkey_cache_debugfs_init(dev); -+ mutex_lock(&cache->rb_lock); - for (i = 0; i <= mkey_cache_max_order(dev); i++) { - rb_key.ndescs = 1 << (i + 2); -- ent = mlx5r_cache_create_ent(dev, rb_key, true); -+ ent = mlx5r_cache_create_ent_locked(dev, rb_key, true); - if (IS_ERR(ent)) { - ret = PTR_ERR(ent); - goto err; -@@ -947,6 +991,7 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev) - if (ret) - goto err; - -+ mutex_unlock(&cache->rb_lock); - for (node = rb_first(root); node; node = rb_next(node)) { - ent = rb_entry(node, struct mlx5_cache_ent, node); - xa_lock_irq(&ent->mkeys); -@@ -957,6 +1002,7 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev) - return 0; - - err: -+ mutex_unlock(&cache->rb_lock); - mlx5_ib_warn(dev, "failed to create mkey cache entry\n"); - return ret; - } -@@ -970,6 +1016,7 @@ int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev) - if (!dev->cache.wq) - return 0; - -+ cancel_delayed_work_sync(&dev->cache.remove_ent_dwork); - mutex_lock(&dev->cache.rb_lock); - for (node = rb_first(root); node; node = rb_next(node)) { - ent = rb_entry(node, struct mlx5_cache_ent, node); -@@ -1752,33 +1799,42 @@ static int cache_ent_find_and_store(struct mlx5_ib_dev *dev, - { - struct mlx5_mkey_cache *cache = &dev->cache; - struct mlx5_cache_ent *ent; -+ int ret; - - if (mr->mmkey.cache_ent) { - xa_lock_irq(&mr->mmkey.cache_ent->mkeys); - mr->mmkey.cache_ent->in_use--; -- xa_unlock_irq(&mr->mmkey.cache_ent->mkeys); - goto end; - } - - mutex_lock(&cache->rb_lock); - ent = mkey_cache_ent_from_rb_key(dev, mr->mmkey.rb_key); -- mutex_unlock(&cache->rb_lock); - if (ent) { - if (ent->rb_key.ndescs == mr->mmkey.rb_key.ndescs) { -+ if (ent->disabled) { -+ mutex_unlock(&cache->rb_lock); -+ return -EOPNOTSUPP; -+ } - mr->mmkey.cache_ent = ent; -+ xa_lock_irq(&mr->mmkey.cache_ent->mkeys); -+ mutex_unlock(&cache->rb_lock); - goto end; - } - } - -- ent = mlx5r_cache_create_ent(dev, mr->mmkey.rb_key, false); -+ ent = mlx5r_cache_create_ent_locked(dev, mr->mmkey.rb_key, false); -+ mutex_unlock(&cache->rb_lock); - if (IS_ERR(ent)) - return PTR_ERR(ent); - - mr->mmkey.cache_ent = ent; -+ xa_lock_irq(&mr->mmkey.cache_ent->mkeys); - - end: -- return push_mkey(mr->mmkey.cache_ent, false, -- xa_mk_value(mr->mmkey.key)); -+ ret = push_mkey_locked(mr->mmkey.cache_ent, false, -+ xa_mk_value(mr->mmkey.key)); -+ xa_unlock_irq(&mr->mmkey.cache_ent->mkeys); -+ return ret; - } - - int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) -diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c -index 96d4faabbff8a..6ba4aa1afdc2d 100644 ---- a/drivers/infiniband/hw/mlx5/odp.c -+++ b/drivers/infiniband/hw/mlx5/odp.c -@@ -1602,7 +1602,7 @@ int mlx5_odp_init_mkey_cache(struct mlx5_ib_dev *dev) - if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) - return 0; - -- ent = mlx5r_cache_create_ent(dev, rb_key, true); -+ ent = mlx5r_cache_create_ent_locked(dev, rb_key, true); - if (IS_ERR(ent)) - return PTR_ERR(ent); - --- -2.39.5 - diff --git a/queue-6.1/rdma-mlx5-cache-all-user-cacheable-mkeys-on-dereg-mr.patch b/queue-6.1/rdma-mlx5-cache-all-user-cacheable-mkeys-on-dereg-mr.patch deleted file mode 100644 index c6c37afc4a..0000000000 --- a/queue-6.1/rdma-mlx5-cache-all-user-cacheable-mkeys-on-dereg-mr.patch +++ /dev/null @@ -1,148 +0,0 @@ -From 5a09f0237455bc487c3d8cb78b82b7263d23d8fe Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Thu, 26 Jan 2023 00:28:06 +0200 -Subject: RDMA/mlx5: Cache all user cacheable mkeys on dereg MR flow - -From: Michael Guralnik - -[ Upstream commit dd1b913fb0d0e3e6d55e92d2319d954474dd66ac ] - -Currently, when dereging an MR, if the mkey doesn't belong to a cache -entry, it will be destroyed. As a result, the restart of applications -with many non-cached mkeys is not efficient since all the mkeys are -destroyed and then recreated. This process takes a long time (for 100,000 -MRs, it is ~20 seconds for dereg and ~28 seconds for re-reg). - -To shorten the restart runtime, insert all cacheable mkeys to the cache. -If there is no fitting entry to the mkey properties, create a temporary -entry that fits it. - -After a predetermined timeout, the cache entries will shrink to the -initial high limit. - -The mkeys will still be in the cache when consuming them again after an -application restart. Therefore, the registration will be much faster -(for 100,000 MRs, it is ~4 seconds for dereg and ~5 seconds for re-reg). - -The temporary cache entries created to store the non-cache mkeys are not -exposed through sysfs like the default cache entries. - -Link: https://lore.kernel.org/r/20230125222807.6921-6-michaelgur@nvidia.com -Signed-off-by: Michael Guralnik -Signed-off-by: Jason Gunthorpe -Stable-dep-of: d97505baea64 ("RDMA/mlx5: Fix the recovery flow of the UMR QP") -Signed-off-by: Sasha Levin ---- - drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 + - drivers/infiniband/hw/mlx5/mr.c | 55 +++++++++++++++++++++------- - 2 files changed, 44 insertions(+), 13 deletions(-) - -diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h -index 7c9d5648947e9..f345e2ae394d2 100644 ---- a/drivers/infiniband/hw/mlx5/mlx5_ib.h -+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h -@@ -650,6 +650,8 @@ struct mlx5_ib_mkey { - unsigned int ndescs; - struct wait_queue_head wait; - refcount_t usecount; -+ /* User Mkey must hold either a rb_key or a cache_ent. */ -+ struct mlx5r_cache_rb_key rb_key; - struct mlx5_cache_ent *cache_ent; - }; - -diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c -index 1060b30a837a0..bf1ca7565be67 100644 ---- a/drivers/infiniband/hw/mlx5/mr.c -+++ b/drivers/infiniband/hw/mlx5/mr.c -@@ -1110,15 +1110,14 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, - rb_key.access_flags = get_unchangeable_access_flags(dev, access_flags); - ent = mkey_cache_ent_from_rb_key(dev, rb_key); - /* -- * Matches access in alloc_cache_mr(). If the MR can't come from the -- * cache then synchronously create an uncached one. -+ * If the MR can't come from the cache then synchronously create an uncached -+ * one. - */ -- if (!ent || ent->limit == 0 || -- !mlx5r_umr_can_reconfig(dev, 0, access_flags) || -- mlx5_umem_needs_ats(dev, umem, access_flags)) { -+ if (!ent) { - mutex_lock(&dev->slow_path_mutex); - mr = reg_create(pd, umem, iova, access_flags, page_size, false); - mutex_unlock(&dev->slow_path_mutex); -+ mr->mmkey.rb_key = rb_key; - return mr; - } - -@@ -1209,6 +1208,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem, - goto err_2; - } - mr->mmkey.type = MLX5_MKEY_MR; -+ mr->mmkey.ndescs = get_octo_len(iova, umem->length, mr->page_shift); - mr->umem = umem; - set_mr_fields(dev, mr, umem->length, access_flags, iova); - kvfree(in); -@@ -1747,6 +1747,40 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr) - } - } - -+static int cache_ent_find_and_store(struct mlx5_ib_dev *dev, -+ struct mlx5_ib_mr *mr) -+{ -+ struct mlx5_mkey_cache *cache = &dev->cache; -+ struct mlx5_cache_ent *ent; -+ -+ if (mr->mmkey.cache_ent) { -+ xa_lock_irq(&mr->mmkey.cache_ent->mkeys); -+ mr->mmkey.cache_ent->in_use--; -+ xa_unlock_irq(&mr->mmkey.cache_ent->mkeys); -+ goto end; -+ } -+ -+ mutex_lock(&cache->rb_lock); -+ ent = mkey_cache_ent_from_rb_key(dev, mr->mmkey.rb_key); -+ mutex_unlock(&cache->rb_lock); -+ if (ent) { -+ if (ent->rb_key.ndescs == mr->mmkey.rb_key.ndescs) { -+ mr->mmkey.cache_ent = ent; -+ goto end; -+ } -+ } -+ -+ ent = mlx5r_cache_create_ent(dev, mr->mmkey.rb_key, false); -+ if (IS_ERR(ent)) -+ return PTR_ERR(ent); -+ -+ mr->mmkey.cache_ent = ent; -+ -+end: -+ return push_mkey(mr->mmkey.cache_ent, false, -+ xa_mk_value(mr->mmkey.key)); -+} -+ - int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) - { - struct mlx5_ib_mr *mr = to_mmr(ibmr); -@@ -1792,16 +1826,11 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) - } - - /* Stop DMA */ -- if (mr->mmkey.cache_ent) { -- xa_lock_irq(&mr->mmkey.cache_ent->mkeys); -- mr->mmkey.cache_ent->in_use--; -- xa_unlock_irq(&mr->mmkey.cache_ent->mkeys); -- -+ if (mr->umem && mlx5r_umr_can_load_pas(dev, mr->umem->length)) - if (mlx5r_umr_revoke_mr(mr) || -- push_mkey(mr->mmkey.cache_ent, false, -- xa_mk_value(mr->mmkey.key))) -+ cache_ent_find_and_store(dev, mr)) - mr->mmkey.cache_ent = NULL; -- } -+ - if (!mr->mmkey.cache_ent) { - rc = destroy_mkey(to_mdev(mr->ibmr.device), mr); - if (rc) --- -2.39.5 - diff --git a/queue-6.1/rdma-mlx5-change-check-for-cacheable-mkeys.patch b/queue-6.1/rdma-mlx5-change-check-for-cacheable-mkeys.patch deleted file mode 100644 index c7b2df5ba0..0000000000 --- a/queue-6.1/rdma-mlx5-change-check-for-cacheable-mkeys.patch +++ /dev/null @@ -1,96 +0,0 @@ -From 8c1185fef68cc603b954fece2a434c9f851d6a86 Mon Sep 17 00:00:00 2001 -From: Or Har-Toov -Date: Wed, 3 Apr 2024 13:36:00 +0300 -Subject: RDMA/mlx5: Change check for cacheable mkeys - -From: Or Har-Toov - -commit 8c1185fef68cc603b954fece2a434c9f851d6a86 upstream. - -umem can be NULL for user application mkeys in some cases. Therefore -umem can't be used for checking if the mkey is cacheable and it is -changed for checking a flag that indicates it. Also make sure that -all mkeys which are not returned to the cache will be destroyed. - -Fixes: dd1b913fb0d0 ("RDMA/mlx5: Cache all user cacheable mkeys on dereg MR flow") -Signed-off-by: Or Har-Toov -Link: https://lore.kernel.org/r/2690bc5c6896bcb937f89af16a1ff0343a7ab3d0.1712140377.git.leon@kernel.org -Signed-off-by: Leon Romanovsky -Signed-off-by: Greg Kroah-Hartman ---- - drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + - drivers/infiniband/hw/mlx5/mr.c | 32 ++++++++++++++++++++++---------- - 2 files changed, 23 insertions(+), 10 deletions(-) - ---- a/drivers/infiniband/hw/mlx5/mlx5_ib.h -+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h -@@ -654,6 +654,7 @@ struct mlx5_ib_mkey { - /* Cacheable user Mkey must hold either a rb_key or a cache_ent. */ - struct mlx5r_cache_rb_key rb_key; - struct mlx5_cache_ent *cache_ent; -+ u8 cacheable : 1; - }; - - #define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE) ---- a/drivers/infiniband/hw/mlx5/mr.c -+++ b/drivers/infiniband/hw/mlx5/mr.c -@@ -1155,6 +1155,7 @@ static struct mlx5_ib_mr *alloc_cacheabl - if (IS_ERR(mr)) - return mr; - mr->mmkey.rb_key = rb_key; -+ mr->mmkey.cacheable = true; - return mr; - } - -@@ -1165,6 +1166,7 @@ static struct mlx5_ib_mr *alloc_cacheabl - mr->ibmr.pd = pd; - mr->umem = umem; - mr->page_shift = order_base_2(page_size); -+ mr->mmkey.cacheable = true; - set_mr_fields(dev, mr, umem->length, access_flags, iova); - - return mr; -@@ -1830,6 +1832,23 @@ end: - return ret; - } - -+static int mlx5_revoke_mr(struct mlx5_ib_mr *mr) -+{ -+ struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); -+ struct mlx5_cache_ent *ent = mr->mmkey.cache_ent; -+ -+ if (mr->mmkey.cacheable && !mlx5r_umr_revoke_mr(mr) && !cache_ent_find_and_store(dev, mr)) -+ return 0; -+ -+ if (ent) { -+ spin_lock_irq(&ent->mkeys_queue.lock); -+ ent->in_use--; -+ mr->mmkey.cache_ent = NULL; -+ spin_unlock_irq(&ent->mkeys_queue.lock); -+ } -+ return destroy_mkey(dev, mr); -+} -+ - int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) - { - struct mlx5_ib_mr *mr = to_mmr(ibmr); -@@ -1875,16 +1894,9 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, - } - - /* Stop DMA */ -- if (mr->umem && mlx5r_umr_can_load_pas(dev, mr->umem->length)) -- if (mlx5r_umr_revoke_mr(mr) || -- cache_ent_find_and_store(dev, mr)) -- mr->mmkey.cache_ent = NULL; -- -- if (!mr->mmkey.cache_ent) { -- rc = destroy_mkey(to_mdev(mr->ibmr.device), mr); -- if (rc) -- return rc; -- } -+ rc = mlx5_revoke_mr(mr); -+ if (rc) -+ return rc; - - if (mr->umem) { - bool is_odp = is_odp_mr(mr); diff --git a/queue-6.1/rdma-mlx5-change-the-cache-structure-to-an-rb-tree.patch b/queue-6.1/rdma-mlx5-change-the-cache-structure-to-an-rb-tree.patch deleted file mode 100644 index c76604c4fc..0000000000 --- a/queue-6.1/rdma-mlx5-change-the-cache-structure-to-an-rb-tree.patch +++ /dev/null @@ -1,354 +0,0 @@ -From 3a78949c3d99afa32e87cf8cfe46723a057ee4cb Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Thu, 26 Jan 2023 00:28:04 +0200 -Subject: RDMA/mlx5: Change the cache structure to an RB-tree - -From: Michael Guralnik - -[ Upstream commit b9584517832858a0f78d6851d09b697a829514cd ] - -Currently, the cache structure is a static linear array. Therefore, his -size is limited to the number of entries in it and is not expandable. The -entries are dedicated to mkeys of size 2^x and no access_flags. Mkeys with -different properties are not cacheable. - -In this patch, we change the cache structure to an RB-tree. This will -allow to extend the cache to support more entries with different mkey -properties. - -Link: https://lore.kernel.org/r/20230125222807.6921-4-michaelgur@nvidia.com -Signed-off-by: Michael Guralnik -Signed-off-by: Jason Gunthorpe -Stable-dep-of: d97505baea64 ("RDMA/mlx5: Fix the recovery flow of the UMR QP") -Signed-off-by: Sasha Levin ---- - drivers/infiniband/hw/mlx5/mlx5_ib.h | 11 +- - drivers/infiniband/hw/mlx5/mr.c | 160 ++++++++++++++++++++------- - drivers/infiniband/hw/mlx5/odp.c | 8 +- - 3 files changed, 132 insertions(+), 47 deletions(-) - -diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h -index 10c87901da27c..bd998ac8c29c1 100644 ---- a/drivers/infiniband/hw/mlx5/mlx5_ib.h -+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h -@@ -761,6 +761,8 @@ struct mlx5_cache_ent { - u32 access_mode; - unsigned int ndescs; - -+ struct rb_node node; -+ - u8 disabled:1; - u8 fill_to_high_water:1; - -@@ -790,8 +792,9 @@ struct mlx5r_async_create_mkey { - - struct mlx5_mkey_cache { - struct workqueue_struct *wq; -- struct mlx5_cache_ent ent[MAX_MKEY_CACHE_ENTRIES]; -- struct dentry *root; -+ struct rb_root rb_root; -+ struct mutex rb_lock; -+ struct dentry *fs_root; - unsigned long last_add; - }; - -@@ -1336,11 +1339,15 @@ void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num); - int mlx5_ib_get_cqe_size(struct ib_cq *ibcq); - int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev); - int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev); -+struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev, -+ int order); - - struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, - struct mlx5_cache_ent *ent, - int access_flags); - -+struct mlx5_ib_mr *mlx5_mr_cache_alloc_order(struct mlx5_ib_dev *dev, u32 order, -+ int access_flags); - int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, - struct ib_mr_status *mr_status); - struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, -diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c -index 53fadd6edb68d..b3d83920d3cfb 100644 ---- a/drivers/infiniband/hw/mlx5/mr.c -+++ b/drivers/infiniband/hw/mlx5/mr.c -@@ -515,18 +515,22 @@ static const struct file_operations limit_fops = { - - static bool someone_adding(struct mlx5_mkey_cache *cache) - { -- unsigned int i; -- -- for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) { -- struct mlx5_cache_ent *ent = &cache->ent[i]; -- bool ret; -+ struct mlx5_cache_ent *ent; -+ struct rb_node *node; -+ bool ret; - -+ mutex_lock(&cache->rb_lock); -+ for (node = rb_first(&cache->rb_root); node; node = rb_next(node)) { -+ ent = rb_entry(node, struct mlx5_cache_ent, node); - xa_lock_irq(&ent->mkeys); - ret = ent->stored < ent->limit; - xa_unlock_irq(&ent->mkeys); -- if (ret) -+ if (ret) { -+ mutex_unlock(&cache->rb_lock); - return true; -+ } - } -+ mutex_unlock(&cache->rb_lock); - return false; - } - -@@ -637,6 +641,59 @@ static void delayed_cache_work_func(struct work_struct *work) - __cache_work_func(ent); - } - -+static int mlx5_cache_ent_insert(struct mlx5_mkey_cache *cache, -+ struct mlx5_cache_ent *ent) -+{ -+ struct rb_node **new = &cache->rb_root.rb_node, *parent = NULL; -+ struct mlx5_cache_ent *cur; -+ -+ mutex_lock(&cache->rb_lock); -+ /* Figure out where to put new node */ -+ while (*new) { -+ cur = rb_entry(*new, struct mlx5_cache_ent, node); -+ parent = *new; -+ if (ent->order < cur->order) -+ new = &((*new)->rb_left); -+ if (ent->order > cur->order) -+ new = &((*new)->rb_right); -+ if (ent->order == cur->order) { -+ mutex_unlock(&cache->rb_lock); -+ return -EEXIST; -+ } -+ } -+ -+ /* Add new node and rebalance tree. */ -+ rb_link_node(&ent->node, parent, new); -+ rb_insert_color(&ent->node, &cache->rb_root); -+ -+ mutex_unlock(&cache->rb_lock); -+ return 0; -+} -+ -+static struct mlx5_cache_ent *mkey_cache_ent_from_order(struct mlx5_ib_dev *dev, -+ unsigned int order) -+{ -+ struct rb_node *node = dev->cache.rb_root.rb_node; -+ struct mlx5_cache_ent *cur, *smallest = NULL; -+ -+ /* -+ * Find the smallest ent with order >= requested_order. -+ */ -+ while (node) { -+ cur = rb_entry(node, struct mlx5_cache_ent, node); -+ if (cur->order > order) { -+ smallest = cur; -+ node = node->rb_left; -+ } -+ if (cur->order < order) -+ node = node->rb_right; -+ if (cur->order == order) -+ return cur; -+ } -+ -+ return smallest; -+} -+ - struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, - struct mlx5_cache_ent *ent, - int access_flags) -@@ -677,10 +734,16 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, - return mr; - } - --static void clean_keys(struct mlx5_ib_dev *dev, int c) -+struct mlx5_ib_mr *mlx5_mr_cache_alloc_order(struct mlx5_ib_dev *dev, -+ u32 order, int access_flags) -+{ -+ struct mlx5_cache_ent *ent = mkey_cache_ent_from_order(dev, order); -+ -+ return mlx5_mr_cache_alloc(dev, ent, access_flags); -+} -+ -+static void clean_keys(struct mlx5_ib_dev *dev, struct mlx5_cache_ent *ent) - { -- struct mlx5_mkey_cache *cache = &dev->cache; -- struct mlx5_cache_ent *ent = &cache->ent[c]; - u32 mkey; - - cancel_delayed_work(&ent->dwork); -@@ -699,8 +762,8 @@ static void mlx5_mkey_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) - if (!mlx5_debugfs_root || dev->is_rep) - return; - -- debugfs_remove_recursive(dev->cache.root); -- dev->cache.root = NULL; -+ debugfs_remove_recursive(dev->cache.fs_root); -+ dev->cache.fs_root = NULL; - } - - static void mlx5_mkey_cache_debugfs_init(struct mlx5_ib_dev *dev) -@@ -713,12 +776,13 @@ static void mlx5_mkey_cache_debugfs_init(struct mlx5_ib_dev *dev) - if (!mlx5_debugfs_root || dev->is_rep) - return; - -- cache->root = debugfs_create_dir("mr_cache", mlx5_debugfs_get_dev_root(dev->mdev)); -+ dir = mlx5_debugfs_get_dev_root(dev->mdev); -+ cache->fs_root = debugfs_create_dir("mr_cache", dir); - - for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) { -- ent = &cache->ent[i]; -+ ent = mkey_cache_ent_from_order(dev, i); - sprintf(ent->name, "%d", ent->order); -- dir = debugfs_create_dir(ent->name, cache->root); -+ dir = debugfs_create_dir(ent->name, cache->fs_root); - debugfs_create_file("size", 0600, dir, ent, &size_fops); - debugfs_create_file("limit", 0600, dir, ent, &limit_fops); - debugfs_create_ulong("cur", 0400, dir, &ent->stored); -@@ -733,6 +797,30 @@ static void delay_time_func(struct timer_list *t) - WRITE_ONCE(dev->fill_delay, 0); - } - -+struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev, -+ int order) -+{ -+ struct mlx5_cache_ent *ent; -+ int ret; -+ -+ ent = kzalloc(sizeof(*ent), GFP_KERNEL); -+ if (!ent) -+ return ERR_PTR(-ENOMEM); -+ -+ xa_init_flags(&ent->mkeys, XA_FLAGS_LOCK_IRQ); -+ ent->order = order; -+ ent->dev = dev; -+ -+ INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); -+ -+ ret = mlx5_cache_ent_insert(&dev->cache, ent); -+ if (ret) { -+ kfree(ent); -+ return ERR_PTR(ret); -+ } -+ return ent; -+} -+ - int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev) - { - struct mlx5_mkey_cache *cache = &dev->cache; -@@ -740,6 +828,8 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev) - int i; - - mutex_init(&dev->slow_path_mutex); -+ mutex_init(&dev->cache.rb_lock); -+ dev->cache.rb_root = RB_ROOT; - cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM); - if (!cache->wq) { - mlx5_ib_warn(dev, "failed to create work queue\n"); -@@ -749,13 +839,7 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev) - mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx); - timer_setup(&dev->delay_timer, delay_time_func, 0); - for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) { -- ent = &cache->ent[i]; -- xa_init_flags(&ent->mkeys, XA_FLAGS_LOCK_IRQ); -- ent->order = i + 2; -- ent->dev = dev; -- ent->limit = 0; -- -- INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); -+ ent = mlx5r_cache_create_ent(dev, i); - - if (i > MKEY_CACHE_LAST_STD_ENTRY) { - mlx5_odp_init_mkey_cache_entry(ent); -@@ -785,14 +869,16 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev) - - int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev) - { -- unsigned int i; -+ struct rb_root *root = &dev->cache.rb_root; -+ struct mlx5_cache_ent *ent; -+ struct rb_node *node; - - if (!dev->cache.wq) - return 0; - -- for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) { -- struct mlx5_cache_ent *ent = &dev->cache.ent[i]; -- -+ mutex_lock(&dev->cache.rb_lock); -+ for (node = rb_first(root); node; node = rb_next(node)) { -+ ent = rb_entry(node, struct mlx5_cache_ent, node); - xa_lock_irq(&ent->mkeys); - ent->disabled = true; - xa_unlock_irq(&ent->mkeys); -@@ -802,8 +888,15 @@ int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev) - mlx5_mkey_cache_debugfs_cleanup(dev); - mlx5_cmd_cleanup_async_ctx(&dev->async_ctx); - -- for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) -- clean_keys(dev, i); -+ node = rb_first(root); -+ while (node) { -+ ent = rb_entry(node, struct mlx5_cache_ent, node); -+ node = rb_next(node); -+ clean_keys(dev, ent); -+ rb_erase(&ent->node, root); -+ kfree(ent); -+ } -+ mutex_unlock(&dev->cache.rb_lock); - - destroy_workqueue(dev->cache.wq); - del_timer_sync(&dev->delay_timer); -@@ -876,19 +969,6 @@ static int mkey_cache_max_order(struct mlx5_ib_dev *dev) - return MLX5_MAX_UMR_SHIFT; - } - --static struct mlx5_cache_ent *mkey_cache_ent_from_order(struct mlx5_ib_dev *dev, -- unsigned int order) --{ -- struct mlx5_mkey_cache *cache = &dev->cache; -- -- if (order < cache->ent[0].order) -- return &cache->ent[0]; -- order = order - cache->ent[0].order; -- if (order > MKEY_CACHE_LAST_STD_ENTRY) -- return NULL; -- return &cache->ent[order]; --} -- - static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, - u64 length, int access_flags, u64 iova) - { -diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c -index 5f0a17382de73..7f68940ca0d1e 100644 ---- a/drivers/infiniband/hw/mlx5/odp.c -+++ b/drivers/infiniband/hw/mlx5/odp.c -@@ -420,8 +420,7 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, - return ERR_CAST(odp); - - BUILD_BUG_ON(order > MKEY_CACHE_LAST_STD_ENTRY); -- mr = mlx5_mr_cache_alloc(dev, &dev->cache.ent[order], -- imr->access_flags); -+ mr = mlx5_mr_cache_alloc_order(dev, order, imr->access_flags); - if (IS_ERR(mr)) { - ib_umem_odp_release(odp); - return mr; -@@ -495,9 +494,8 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, - if (IS_ERR(umem_odp)) - return ERR_CAST(umem_odp); - -- imr = mlx5_mr_cache_alloc(dev, -- &dev->cache.ent[MLX5_IMR_KSM_CACHE_ENTRY], -- access_flags); -+ imr = mlx5_mr_cache_alloc_order(dev, MLX5_IMR_KSM_CACHE_ENTRY, -+ access_flags); - if (IS_ERR(imr)) { - ib_umem_odp_release(umem_odp); - return imr; --- -2.39.5 - diff --git a/queue-6.1/rdma-mlx5-check-reg_create-create-for-errors.patch b/queue-6.1/rdma-mlx5-check-reg_create-create-for-errors.patch deleted file mode 100644 index 666ce7fb24..0000000000 --- a/queue-6.1/rdma-mlx5-check-reg_create-create-for-errors.patch +++ /dev/null @@ -1,32 +0,0 @@ -From 8e6e49ccf1a0f2b3257394dc8610bb6d48859d3f Mon Sep 17 00:00:00 2001 -From: Dan Carpenter -Date: Mon, 6 Feb 2023 17:40:35 +0300 -Subject: RDMA/mlx5: Check reg_create() create for errors - -From: Dan Carpenter - -commit 8e6e49ccf1a0f2b3257394dc8610bb6d48859d3f upstream. - -The reg_create() can fail. Check for errors before dereferencing it. - -Fixes: dd1b913fb0d0 ("RDMA/mlx5: Cache all user cacheable mkeys on dereg MR flow") -Signed-off-by: Dan Carpenter -Link: https://lore.kernel.org/r/Y+ERYy4wN0LsKsm+@kili -Reviewed-by: Devesh Sharma -Signed-off-by: Leon Romanovsky -Signed-off-by: Greg Kroah-Hartman ---- - drivers/infiniband/hw/mlx5/mr.c | 2 ++ - 1 file changed, 2 insertions(+) - ---- a/drivers/infiniband/hw/mlx5/mr.c -+++ b/drivers/infiniband/hw/mlx5/mr.c -@@ -1143,6 +1143,8 @@ static struct mlx5_ib_mr *alloc_cacheabl - mutex_lock(&dev->slow_path_mutex); - mr = reg_create(pd, umem, iova, access_flags, page_size, false); - mutex_unlock(&dev->slow_path_mutex); -+ if (IS_ERR(mr)) -+ return mr; - mr->mmkey.rb_key = rb_key; - return mr; - } diff --git a/queue-6.1/rdma-mlx5-don-t-keep-umrable-page_shift-in-cache-ent.patch b/queue-6.1/rdma-mlx5-don-t-keep-umrable-page_shift-in-cache-ent.patch deleted file mode 100644 index 2caa17ca8b..0000000000 --- a/queue-6.1/rdma-mlx5-don-t-keep-umrable-page_shift-in-cache-ent.patch +++ /dev/null @@ -1,83 +0,0 @@ -From a85b91bcb6fce39a7511353461ead5a60b13bc69 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Thu, 26 Jan 2023 00:28:02 +0200 -Subject: RDMA/mlx5: Don't keep umrable 'page_shift' in cache entries - -From: Aharon Landau - -[ Upstream commit a2a88b8e22d1b202225d0e40b02ad068afab2ccb ] - -mkc.log_page_size can be changed using UMR. Therefore, don't treat it as a -cache entry property. - -Removing it from struct mlx5_cache_ent. - -All cache mkeys will be created with default PAGE_SHIFT, and updated with -the needed page_shift using UMR when passing them to a user. - -Link: https://lore.kernel.org/r/20230125222807.6921-2-michaelgur@nvidia.com -Signed-off-by: Aharon Landau -Signed-off-by: Jason Gunthorpe -Stable-dep-of: d97505baea64 ("RDMA/mlx5: Fix the recovery flow of the UMR QP") -Signed-off-by: Sasha Levin ---- - drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 - - drivers/infiniband/hw/mlx5/mr.c | 3 +-- - drivers/infiniband/hw/mlx5/odp.c | 2 -- - 3 files changed, 1 insertion(+), 5 deletions(-) - -diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h -index 0ef347e91ffeb..10c87901da27c 100644 ---- a/drivers/infiniband/hw/mlx5/mlx5_ib.h -+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h -@@ -759,7 +759,6 @@ struct mlx5_cache_ent { - char name[4]; - u32 order; - u32 access_mode; -- u32 page; - unsigned int ndescs; - - u8 disabled:1; -diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c -index b81b03aa2a629..53fadd6edb68d 100644 ---- a/drivers/infiniband/hw/mlx5/mr.c -+++ b/drivers/infiniband/hw/mlx5/mr.c -@@ -297,7 +297,7 @@ static void set_cache_mkc(struct mlx5_cache_ent *ent, void *mkc) - - MLX5_SET(mkc, mkc, translations_octword_size, - get_mkc_octo_size(ent->access_mode, ent->ndescs)); -- MLX5_SET(mkc, mkc, log_page_size, ent->page); -+ MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); - } - - /* Asynchronously schedule new MRs to be populated in the cache. */ -@@ -765,7 +765,6 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev) - if (ent->order > mkey_cache_max_order(dev)) - continue; - -- ent->page = PAGE_SHIFT; - ent->ndescs = 1 << ent->order; - ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT; - if ((dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) && -diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c -index 87fbee8061003..a5c9baec8be85 100644 ---- a/drivers/infiniband/hw/mlx5/odp.c -+++ b/drivers/infiniband/hw/mlx5/odp.c -@@ -1598,14 +1598,12 @@ void mlx5_odp_init_mkey_cache_entry(struct mlx5_cache_ent *ent) - - switch (ent->order - 2) { - case MLX5_IMR_MTT_CACHE_ENTRY: -- ent->page = PAGE_SHIFT; - ent->ndescs = MLX5_IMR_MTT_ENTRIES; - ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT; - ent->limit = 0; - break; - - case MLX5_IMR_KSM_CACHE_ENTRY: -- ent->page = MLX5_KSM_PAGE_SHIFT; - ent->ndescs = mlx5_imr_ksm_entries; - ent->access_mode = MLX5_MKC_ACCESS_MODE_KSM; - ent->limit = 0; --- -2.39.5 - diff --git a/queue-6.1/rdma-mlx5-ensure-created-mkeys-always-have-a-populated-rb_key.patch b/queue-6.1/rdma-mlx5-ensure-created-mkeys-always-have-a-populated-rb_key.patch deleted file mode 100644 index add9d0b3c3..0000000000 --- a/queue-6.1/rdma-mlx5-ensure-created-mkeys-always-have-a-populated-rb_key.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 2e4c02fdecf2f6f55cefe48cb82d93fa4f8e2204 Mon Sep 17 00:00:00 2001 -From: Jason Gunthorpe -Date: Tue, 28 May 2024 15:52:54 +0300 -Subject: RDMA/mlx5: Ensure created mkeys always have a populated rb_key - -From: Jason Gunthorpe - -commit 2e4c02fdecf2f6f55cefe48cb82d93fa4f8e2204 upstream. - -cachable and mmkey.rb_key together are used by mlx5_revoke_mr() to put the -MR/mkey back into the cache. In all cases they should be set correctly. - -alloc_cacheable_mr() was setting cachable but not filling rb_key, -resulting in cache_ent_find_and_store() bucketing them all into a 0 length -entry. - -implicit_get_child_mr()/mlx5_ib_alloc_implicit_mr() failed to set cachable -or rb_key at all, so the cache was not working at all for implicit ODP. - -Cc: stable@vger.kernel.org -Fixes: 8c1185fef68c ("RDMA/mlx5: Change check for cacheable mkeys") -Fixes: dd1b913fb0d0 ("RDMA/mlx5: Cache all user cacheable mkeys on dereg MR flow") -Signed-off-by: Jason Gunthorpe -Link: https://lore.kernel.org/r/7778c02dfa0999a30d6746c79a23dd7140a9c729.1716900410.git.leon@kernel.org -Signed-off-by: Leon Romanovsky -Signed-off-by: Greg Kroah-Hartman ---- - drivers/infiniband/hw/mlx5/mr.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - ---- a/drivers/infiniband/hw/mlx5/mr.c -+++ b/drivers/infiniband/hw/mlx5/mr.c -@@ -715,6 +715,8 @@ static struct mlx5_ib_mr *_mlx5_mr_cache - } - mr->mmkey.cache_ent = ent; - mr->mmkey.type = MLX5_MKEY_MR; -+ mr->mmkey.rb_key = ent->rb_key; -+ mr->mmkey.cacheable = true; - init_waitqueue_head(&mr->mmkey.wait); - return mr; - } -@@ -1165,7 +1167,6 @@ static struct mlx5_ib_mr *alloc_cacheabl - mr->ibmr.pd = pd; - mr->umem = umem; - mr->page_shift = order_base_2(page_size); -- mr->mmkey.cacheable = true; - set_mr_fields(dev, mr, umem->length, access_flags, iova); - - return mr; diff --git a/queue-6.1/rdma-mlx5-fix-ah-static-rate-parsing.patch b/queue-6.1/rdma-mlx5-fix-ah-static-rate-parsing.patch deleted file mode 100644 index 7c8747036d..0000000000 --- a/queue-6.1/rdma-mlx5-fix-ah-static-rate-parsing.patch +++ /dev/null @@ -1,84 +0,0 @@ -From b79f406d4cc08e99e836a5e95040672efdba5313 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Mon, 10 Feb 2025 13:32:39 +0200 -Subject: RDMA/mlx5: Fix AH static rate parsing - -From: Patrisious Haddad - -[ Upstream commit c534ffda781f44a1c6ac25ef6e0e444da38ca8af ] - -Previously static rate wasn't translated according to our PRM but simply -used the 4 lower bytes. - -Correctly translate static rate value passed in AH creation attribute -according to our PRM expected values. - -In addition change 800GB mapping to zero, which is the PRM -specified value. - -Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") -Signed-off-by: Patrisious Haddad -Reviewed-by: Maor Gottlieb -Link: https://patch.msgid.link/18ef4cc5396caf80728341eb74738cd777596f60.1739187089.git.leon@kernel.org -Signed-off-by: Leon Romanovsky -Signed-off-by: Sasha Levin ---- - drivers/infiniband/hw/mlx5/ah.c | 3 ++- - drivers/infiniband/hw/mlx5/qp.c | 6 +++--- - drivers/infiniband/hw/mlx5/qp.h | 1 + - 3 files changed, 6 insertions(+), 4 deletions(-) - -diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c -index 505bc47fd575d..99036afb3aef0 100644 ---- a/drivers/infiniband/hw/mlx5/ah.c -+++ b/drivers/infiniband/hw/mlx5/ah.c -@@ -67,7 +67,8 @@ static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah, - ah->av.tclass = grh->traffic_class; - } - -- ah->av.stat_rate_sl = (rdma_ah_get_static_rate(ah_attr) << 4); -+ ah->av.stat_rate_sl = -+ (mlx5r_ib_rate(dev, rdma_ah_get_static_rate(ah_attr)) << 4); - - if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) { - if (init_attr->xmit_slave) -diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c -index 43c0123babd10..59dca0cd89052 100644 ---- a/drivers/infiniband/hw/mlx5/qp.c -+++ b/drivers/infiniband/hw/mlx5/qp.c -@@ -3379,11 +3379,11 @@ static int ib_to_mlx5_rate_map(u8 rate) - return 0; - } - --static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate) -+int mlx5r_ib_rate(struct mlx5_ib_dev *dev, u8 rate) - { - u32 stat_rate_support; - -- if (rate == IB_RATE_PORT_CURRENT) -+ if (rate == IB_RATE_PORT_CURRENT || rate == IB_RATE_800_GBPS) - return 0; - - if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_600_GBPS) -@@ -3528,7 +3528,7 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, - sizeof(grh->dgid.raw)); - } - -- err = ib_rate_to_mlx5(dev, rdma_ah_get_static_rate(ah)); -+ err = mlx5r_ib_rate(dev, rdma_ah_get_static_rate(ah)); - if (err < 0) - return err; - MLX5_SET(ads, path, stat_rate, err); -diff --git a/drivers/infiniband/hw/mlx5/qp.h b/drivers/infiniband/hw/mlx5/qp.h -index e677fa0ca4226..4abb77d551670 100644 ---- a/drivers/infiniband/hw/mlx5/qp.h -+++ b/drivers/infiniband/hw/mlx5/qp.h -@@ -55,4 +55,5 @@ int mlx5_core_xrcd_dealloc(struct mlx5_ib_dev *dev, u32 xrcdn); - int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter); - int mlx5_ib_qp_event_init(void); - void mlx5_ib_qp_event_cleanup(void); -+int mlx5r_ib_rate(struct mlx5_ib_dev *dev, u8 rate); - #endif /* _MLX5_IB_QP_H */ --- -2.39.5 - diff --git a/queue-6.1/rdma-mlx5-fix-assigning-access-flags-to-cache-mkeys.patch b/queue-6.1/rdma-mlx5-fix-assigning-access-flags-to-cache-mkeys.patch deleted file mode 100644 index 840c967cc2..0000000000 --- a/queue-6.1/rdma-mlx5-fix-assigning-access-flags-to-cache-mkeys.patch +++ /dev/null @@ -1,46 +0,0 @@ -From 4f14c6c0213e1def48f0f887d35f44095416c67d Mon Sep 17 00:00:00 2001 -From: Michael Guralnik -Date: Wed, 20 Sep 2023 13:01:54 +0300 -Subject: RDMA/mlx5: Fix assigning access flags to cache mkeys - -From: Michael Guralnik - -commit 4f14c6c0213e1def48f0f887d35f44095416c67d upstream. - -After the change to use dynamic cache structure, new cache entries -can be added and the mkey allocation can no longer assume that all -mkeys created for the cache have access_flags equal to zero. - -Example of a flow that exposes the issue: -A user registers MR with RO on a HCA that cannot UMR RO and the mkey is -created outside of the cache. When the user deregisters the MR, a new -cache entry is created to store mkeys with RO. - -Later, the user registers 2 MRs with RO. The first MR is reused from the -new cache entry. When we try to get the second mkey from the cache we see -the entry is empty so we go to the MR cache mkey allocation flow which -would have allocated a mkey with no access flags, resulting the user getting -a MR without RO. - -Fixes: dd1b913fb0d0 ("RDMA/mlx5: Cache all user cacheable mkeys on dereg MR flow") -Reviewed-by: Edward Srouji -Signed-off-by: Michael Guralnik -Link: https://lore.kernel.org/r/8a802700b82def3ace3f77cd7a9ad9d734af87e7.1695203958.git.leonro@nvidia.com -Signed-off-by: Leon Romanovsky -Signed-off-by: Greg Kroah-Hartman ---- - drivers/infiniband/hw/mlx5/mr.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - ---- a/drivers/infiniband/hw/mlx5/mr.c -+++ b/drivers/infiniband/hw/mlx5/mr.c -@@ -236,7 +236,8 @@ static int get_mkc_octo_size(unsigned in - - static void set_cache_mkc(struct mlx5_cache_ent *ent, void *mkc) - { -- set_mkc_access_pd_addr_fields(mkc, 0, 0, ent->dev->umrc.pd); -+ set_mkc_access_pd_addr_fields(mkc, ent->rb_key.access_flags, 0, -+ ent->dev->umrc.pd); - MLX5_SET(mkc, mkc, free, 1); - MLX5_SET(mkc, mkc, umr_en, 1); - MLX5_SET(mkc, mkc, access_mode_1_0, ent->rb_key.access_mode & 0x3); diff --git a/queue-6.1/rdma-mlx5-fix-counter-update-on-mr-cache-mkey-creation.patch b/queue-6.1/rdma-mlx5-fix-counter-update-on-mr-cache-mkey-creation.patch deleted file mode 100644 index 2d8a92613f..0000000000 --- a/queue-6.1/rdma-mlx5-fix-counter-update-on-mr-cache-mkey-creation.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 6f5cd6ac9a4201e4ba6f10b76a9da8044d6e38b0 Mon Sep 17 00:00:00 2001 -From: Michael Guralnik -Date: Tue, 3 Sep 2024 14:24:48 +0300 -Subject: RDMA/mlx5: Fix counter update on MR cache mkey creation - -From: Michael Guralnik - -commit 6f5cd6ac9a4201e4ba6f10b76a9da8044d6e38b0 upstream. - -After an mkey is created, update the counter for pending mkeys before -reshceduling the work that is filling the cache. - -Rescheduling the work with a full MR cache entry and a wrong 'pending' -counter will cause us to miss disabling the fill_to_high_water flag. -Thus leaving the cache full but with an indication that it's still -needs to be filled up to it's full size (2 * limit). -Next time an mkey will be taken from the cache, we'll unnecessarily -continue the process of filling the cache to it's full size. - -Fixes: 57e7071683ef ("RDMA/mlx5: Implement mkeys management via LIFO queue") -Signed-off-by: Michael Guralnik -Link: https://patch.msgid.link/0f44f462ba22e45f72cb3d0ec6a748634086b8d0.1725362530.git.leon@kernel.org -Signed-off-by: Leon Romanovsky -Signed-off-by: Greg Kroah-Hartman ---- - drivers/infiniband/hw/mlx5/mr.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/drivers/infiniband/hw/mlx5/mr.c -+++ b/drivers/infiniband/hw/mlx5/mr.c -@@ -208,9 +208,9 @@ static void create_mkey_callback(int sta - - spin_lock_irqsave(&ent->mkeys_queue.lock, flags); - push_mkey_locked(ent, mkey_out->mkey); -+ ent->pending--; - /* If we are doing fill_to_high_water then keep going. */ - queue_adjust_cache_locked(ent); -- ent->pending--; - spin_unlock_irqrestore(&ent->mkeys_queue.lock, flags); - kfree(mkey_out); - } diff --git a/queue-6.1/rdma-mlx5-fix-mkey-cache-possible-deadlock-on-cleanup.patch b/queue-6.1/rdma-mlx5-fix-mkey-cache-possible-deadlock-on-cleanup.patch deleted file mode 100644 index 731d75d835..0000000000 --- a/queue-6.1/rdma-mlx5-fix-mkey-cache-possible-deadlock-on-cleanup.patch +++ /dev/null @@ -1,151 +0,0 @@ -From 374012b0045780b7ad498be62e85153009bb7fe9 Mon Sep 17 00:00:00 2001 -From: Shay Drory -Date: Tue, 12 Sep 2023 13:07:45 +0300 -Subject: RDMA/mlx5: Fix mkey cache possible deadlock on cleanup - -From: Shay Drory - -commit 374012b0045780b7ad498be62e85153009bb7fe9 upstream. - -Fix the deadlock by refactoring the MR cache cleanup flow to flush the -workqueue without holding the rb_lock. -This adds a race between cache cleanup and creation of new entries which -we solve by denied creation of new entries after cache cleanup started. - -Lockdep: -WARNING: possible circular locking dependency detected - [ 2785.326074 ] 6.2.0-rc6_for_upstream_debug_2023_01_31_14_02 #1 Not tainted - [ 2785.339778 ] ------------------------------------------------------ - [ 2785.340848 ] devlink/53872 is trying to acquire lock: - [ 2785.341701 ] ffff888124f8c0c8 ((work_completion)(&(&ent->dwork)->work)){+.+.}-{0:0}, at: __flush_work+0xc8/0x900 - [ 2785.343403 ] - [ 2785.343403 ] but task is already holding lock: - [ 2785.344464 ] ffff88817e8f1260 (&dev->cache.rb_lock){+.+.}-{3:3}, at: mlx5_mkey_cache_cleanup+0x77/0x250 [mlx5_ib] - [ 2785.346273 ] - [ 2785.346273 ] which lock already depends on the new lock. - [ 2785.346273 ] - [ 2785.347720 ] - [ 2785.347720 ] the existing dependency chain (in reverse order) is: - [ 2785.349003 ] - [ 2785.349003 ] -> #1 (&dev->cache.rb_lock){+.+.}-{3:3}: - [ 2785.350160 ] __mutex_lock+0x14c/0x15c0 - [ 2785.350962 ] delayed_cache_work_func+0x2d1/0x610 [mlx5_ib] - [ 2785.352044 ] process_one_work+0x7c2/0x1310 - [ 2785.352879 ] worker_thread+0x59d/0xec0 - [ 2785.353636 ] kthread+0x28f/0x330 - [ 2785.354370 ] ret_from_fork+0x1f/0x30 - [ 2785.355135 ] - [ 2785.355135 ] -> #0 ((work_completion)(&(&ent->dwork)->work)){+.+.}-{0:0}: - [ 2785.356515 ] __lock_acquire+0x2d8a/0x5fe0 - [ 2785.357349 ] lock_acquire+0x1c1/0x540 - [ 2785.358121 ] __flush_work+0xe8/0x900 - [ 2785.358852 ] __cancel_work_timer+0x2c7/0x3f0 - [ 2785.359711 ] mlx5_mkey_cache_cleanup+0xfb/0x250 [mlx5_ib] - [ 2785.360781 ] mlx5_ib_stage_pre_ib_reg_umr_cleanup+0x16/0x30 [mlx5_ib] - [ 2785.361969 ] __mlx5_ib_remove+0x68/0x120 [mlx5_ib] - [ 2785.362960 ] mlx5r_remove+0x63/0x80 [mlx5_ib] - [ 2785.363870 ] auxiliary_bus_remove+0x52/0x70 - [ 2785.364715 ] device_release_driver_internal+0x3c1/0x600 - [ 2785.365695 ] bus_remove_device+0x2a5/0x560 - [ 2785.366525 ] device_del+0x492/0xb80 - [ 2785.367276 ] mlx5_detach_device+0x1a9/0x360 [mlx5_core] - [ 2785.368615 ] mlx5_unload_one_devl_locked+0x5a/0x110 [mlx5_core] - [ 2785.369934 ] mlx5_devlink_reload_down+0x292/0x580 [mlx5_core] - [ 2785.371292 ] devlink_reload+0x439/0x590 - [ 2785.372075 ] devlink_nl_cmd_reload+0xaef/0xff0 - [ 2785.372973 ] genl_family_rcv_msg_doit.isra.0+0x1bd/0x290 - [ 2785.374011 ] genl_rcv_msg+0x3ca/0x6c0 - [ 2785.374798 ] netlink_rcv_skb+0x12c/0x360 - [ 2785.375612 ] genl_rcv+0x24/0x40 - [ 2785.376295 ] netlink_unicast+0x438/0x710 - [ 2785.377121 ] netlink_sendmsg+0x7a1/0xca0 - [ 2785.377926 ] sock_sendmsg+0xc5/0x190 - [ 2785.378668 ] __sys_sendto+0x1bc/0x290 - [ 2785.379440 ] __x64_sys_sendto+0xdc/0x1b0 - [ 2785.380255 ] do_syscall_64+0x3d/0x90 - [ 2785.381031 ] entry_SYSCALL_64_after_hwframe+0x46/0xb0 - [ 2785.381967 ] - [ 2785.381967 ] other info that might help us debug this: - [ 2785.381967 ] - [ 2785.383448 ] Possible unsafe locking scenario: - [ 2785.383448 ] - [ 2785.384544 ] CPU0 CPU1 - [ 2785.385383 ] ---- ---- - [ 2785.386193 ] lock(&dev->cache.rb_lock); - [ 2785.386940 ] lock((work_completion)(&(&ent->dwork)->work)); - [ 2785.388327 ] lock(&dev->cache.rb_lock); - [ 2785.389425 ] lock((work_completion)(&(&ent->dwork)->work)); - [ 2785.390414 ] - [ 2785.390414 ] *** DEADLOCK *** - [ 2785.390414 ] - [ 2785.391579 ] 6 locks held by devlink/53872: - [ 2785.392341 ] #0: ffffffff84c17a50 (cb_lock){++++}-{3:3}, at: genl_rcv+0x15/0x40 - [ 2785.393630 ] #1: ffff888142280218 (&devlink->lock_key){+.+.}-{3:3}, at: devlink_get_from_attrs_lock+0x12d/0x2d0 - [ 2785.395324 ] #2: ffff8881422d3c38 (&dev->lock_key){+.+.}-{3:3}, at: mlx5_unload_one_devl_locked+0x4a/0x110 [mlx5_core] - [ 2785.397322 ] #3: ffffffffa0e59068 (mlx5_intf_mutex){+.+.}-{3:3}, at: mlx5_detach_device+0x60/0x360 [mlx5_core] - [ 2785.399231 ] #4: ffff88810e3cb0e8 (&dev->mutex){....}-{3:3}, at: device_release_driver_internal+0x8d/0x600 - [ 2785.400864 ] #5: ffff88817e8f1260 (&dev->cache.rb_lock){+.+.}-{3:3}, at: mlx5_mkey_cache_cleanup+0x77/0x250 [mlx5_ib] - -Fixes: b95845178328 ("RDMA/mlx5: Change the cache structure to an RB-tree") -Signed-off-by: Shay Drory -Signed-off-by: Michael Guralnik -Signed-off-by: Leon Romanovsky -Signed-off-by: Greg Kroah-Hartman ---- - drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + - drivers/infiniband/hw/mlx5/mr.c | 16 ++++++++++++++-- - 2 files changed, 15 insertions(+), 2 deletions(-) - ---- a/drivers/infiniband/hw/mlx5/mlx5_ib.h -+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h -@@ -821,6 +821,7 @@ struct mlx5_mkey_cache { - struct dentry *fs_root; - unsigned long last_add; - struct delayed_work remove_ent_dwork; -+ u8 disable: 1; - }; - - struct mlx5_ib_port_resources { ---- a/drivers/infiniband/hw/mlx5/mr.c -+++ b/drivers/infiniband/hw/mlx5/mr.c -@@ -994,19 +994,27 @@ int mlx5_mkey_cache_cleanup(struct mlx5_ - if (!dev->cache.wq) - return 0; - -- cancel_delayed_work_sync(&dev->cache.remove_ent_dwork); - mutex_lock(&dev->cache.rb_lock); -+ dev->cache.disable = true; - for (node = rb_first(root); node; node = rb_next(node)) { - ent = rb_entry(node, struct mlx5_cache_ent, node); - spin_lock_irq(&ent->mkeys_queue.lock); - ent->disabled = true; - spin_unlock_irq(&ent->mkeys_queue.lock); -- cancel_delayed_work_sync(&ent->dwork); - } -+ mutex_unlock(&dev->cache.rb_lock); -+ -+ /* -+ * After all entries are disabled and will not reschedule on WQ, -+ * flush it and all async commands. -+ */ -+ flush_workqueue(dev->cache.wq); - - mlx5_mkey_cache_debugfs_cleanup(dev); - mlx5_cmd_cleanup_async_ctx(&dev->async_ctx); - -+ /* At this point all entries are disabled and have no concurrent work. */ -+ mutex_lock(&dev->cache.rb_lock); - node = rb_first(root); - while (node) { - ent = rb_entry(node, struct mlx5_cache_ent, node); -@@ -1789,6 +1797,10 @@ static int cache_ent_find_and_store(stru - } - - mutex_lock(&cache->rb_lock); -+ if (cache->disable) { -+ mutex_unlock(&cache->rb_lock); -+ return 0; -+ } - ent = mkey_cache_ent_from_rb_key(dev, mr->mmkey.rb_key); - if (ent) { - if (ent->rb_key.ndescs == mr->mmkey.rb_key.ndescs) { diff --git a/queue-6.1/rdma-mlx5-fix-mkey-cache-wq-flush.patch b/queue-6.1/rdma-mlx5-fix-mkey-cache-wq-flush.patch deleted file mode 100644 index c28bcf45ab..0000000000 --- a/queue-6.1/rdma-mlx5-fix-mkey-cache-wq-flush.patch +++ /dev/null @@ -1,44 +0,0 @@ -From a53e215f90079f617360439b1b6284820731e34c Mon Sep 17 00:00:00 2001 -From: Moshe Shemesh -Date: Wed, 25 Oct 2023 20:49:59 +0300 -Subject: RDMA/mlx5: Fix mkey cache WQ flush - -From: Moshe Shemesh - -commit a53e215f90079f617360439b1b6284820731e34c upstream. - -The cited patch tries to ensure no pending works on the mkey cache -workqueue by disabling adding new works and call flush_workqueue(). -But this workqueue also has delayed works which might still be pending -the delay time to be queued. - -Add cancel_delayed_work() for the delayed works which waits to be queued -and then the flush_workqueue() will flush all works which are already -queued and running. - -Fixes: 374012b00457 ("RDMA/mlx5: Fix mkey cache possible deadlock on cleanup") -Link: https://lore.kernel.org/r/b8722f14e7ed81452f791764a26d2ed4cfa11478.1698256179.git.leon@kernel.org -Signed-off-by: Moshe Shemesh -Signed-off-by: Leon Romanovsky -Signed-off-by: Jason Gunthorpe -Signed-off-by: Greg Kroah-Hartman ---- - drivers/infiniband/hw/mlx5/mr.c | 2 ++ - 1 file changed, 2 insertions(+) - ---- a/drivers/infiniband/hw/mlx5/mr.c -+++ b/drivers/infiniband/hw/mlx5/mr.c -@@ -1007,11 +1007,13 @@ int mlx5_mkey_cache_cleanup(struct mlx5_ - return 0; - - mutex_lock(&dev->cache.rb_lock); -+ cancel_delayed_work(&dev->cache.remove_ent_dwork); - for (node = rb_first(root); node; node = rb_next(node)) { - ent = rb_entry(node, struct mlx5_cache_ent, node); - spin_lock_irq(&ent->mkeys_queue.lock); - ent->disabled = true; - spin_unlock_irq(&ent->mkeys_queue.lock); -+ cancel_delayed_work(&ent->dwork); - } - mutex_unlock(&dev->cache.rb_lock); - diff --git a/queue-6.1/rdma-mlx5-fix-mr-cache-debugfs-error-in-ib-representors-mode.patch b/queue-6.1/rdma-mlx5-fix-mr-cache-debugfs-error-in-ib-representors-mode.patch deleted file mode 100644 index 31fbb7172e..0000000000 --- a/queue-6.1/rdma-mlx5-fix-mr-cache-debugfs-error-in-ib-representors-mode.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 828cf5936bea2438c21a3a6c303b34a2a1f6c3c2 Mon Sep 17 00:00:00 2001 -From: Leon Romanovsky -Date: Thu, 2 Feb 2023 11:03:06 +0200 -Subject: RDMA/mlx5: Fix MR cache debugfs error in IB representors mode - -From: Leon Romanovsky - -commit 828cf5936bea2438c21a3a6c303b34a2a1f6c3c2 upstream. - -Block MR cache debugfs creation for IB representor flow as MR cache shouldn't be used -at all in that mode. As part of this change, add missing debugfs cleanup in error path -too. - -This change fixes the following debugfs errors: - - bond0: (slave enp8s0f1): Enslaving as a backup interface with an up link - mlx5_core 0000:08:00.0: lag map: port 1:1 port 2:1 - mlx5_core 0000:08:00.0: shared_fdb:1 mode:queue_affinity - mlx5_core 0000:08:00.0: Operation mode is single FDB - debugfs: Directory '2' with parent '/' already present! -... - debugfs: Directory '22' with parent '/' already present! - -Fixes: 73d09b2fe833 ("RDMA/mlx5: Introduce mlx5r_cache_rb_key") -Signed-off-by: Michael Guralnik -Link: https://lore.kernel.org/r/482a78c54acbcfa1742a0e06a452546428900ffa.1675328463.git.leon@kernel.org -Signed-off-by: Leon Romanovsky -Signed-off-by: Greg Kroah-Hartman ---- - drivers/infiniband/hw/mlx5/mr.c | 4 ++++ - 1 file changed, 4 insertions(+) - ---- a/drivers/infiniband/hw/mlx5/mr.c -+++ b/drivers/infiniband/hw/mlx5/mr.c -@@ -789,6 +789,9 @@ static void mlx5_mkey_cache_debugfs_add_ - int order = order_base_2(ent->rb_key.ndescs); - struct dentry *dir; - -+ if (!mlx5_debugfs_root || dev->is_rep) -+ return; -+ - if (ent->rb_key.access_mode == MLX5_MKC_ACCESS_MODE_KSM) - order = MLX5_IMR_KSM_CACHE_ENTRY + 2; - -@@ -977,6 +980,7 @@ int mlx5_mkey_cache_init(struct mlx5_ib_ - - err: - mutex_unlock(&cache->rb_lock); -+ mlx5_mkey_cache_debugfs_cleanup(dev); - mlx5_ib_warn(dev, "failed to create mkey cache entry\n"); - return ret; - } diff --git a/queue-6.1/rdma-mlx5-fix-mr-cache-temp-entries-cleanup.patch b/queue-6.1/rdma-mlx5-fix-mr-cache-temp-entries-cleanup.patch deleted file mode 100644 index 223ad4d579..0000000000 --- a/queue-6.1/rdma-mlx5-fix-mr-cache-temp-entries-cleanup.patch +++ /dev/null @@ -1,214 +0,0 @@ -From 7ebb00cea49db641b458edef0ede389f7004821d Mon Sep 17 00:00:00 2001 -From: Michael Guralnik -Date: Tue, 3 Sep 2024 14:24:50 +0300 -Subject: RDMA/mlx5: Fix MR cache temp entries cleanup - -From: Michael Guralnik - -commit 7ebb00cea49db641b458edef0ede389f7004821d upstream. - -Fix the cleanup of the temp cache entries that are dynamically created -in the MR cache. - -The cleanup of the temp cache entries is currently scheduled only when a -new entry is created. Since in the cleanup of the entries only the mkeys -are destroyed and the cache entry stays in the cache, subsequent -registrations might reuse the entry and it will eventually be filled with -new mkeys without cleanup ever getting scheduled again. - -On workloads that register and deregister MRs with a wide range of -properties we see the cache ends up holding many cache entries, each -holding the max number of mkeys that were ever used through it. - -Additionally, as the cleanup work is scheduled to run over the whole -cache, any mkey that is returned to the cache after the cleanup was -scheduled will be held for less than the intended 30 seconds timeout. - -Solve both issues by dropping the existing remove_ent_work and reusing -the existing per-entry work to also handle the temp entries cleanup. - -Schedule the work to run with a 30 seconds delay every time we push an -mkey to a clean temp entry. -This ensures the cleanup runs on each entry only 30 seconds after the -first mkey was pushed to an empty entry. - -As we have already been distinguishing between persistent and temp entries -when scheduling the cache_work_func, it is not being scheduled in any -other flows for the temp entries. - -Another benefit from moving to a per-entry cleanup is we now not -required to hold the rb_tree mutex, thus enabling other flow to run -concurrently. - -Fixes: dd1b913fb0d0 ("RDMA/mlx5: Cache all user cacheable mkeys on dereg MR flow") -Signed-off-by: Michael Guralnik -Link: https://patch.msgid.link/e4fa4bb03bebf20dceae320f26816cd2dde23a26.1725362530.git.leon@kernel.org -Signed-off-by: Leon Romanovsky -Signed-off-by: Greg Kroah-Hartman ---- - drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 - drivers/infiniband/hw/mlx5/mr.c | 85 +++++++++++++---------------------- - 2 files changed, 34 insertions(+), 53 deletions(-) - ---- a/drivers/infiniband/hw/mlx5/mlx5_ib.h -+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h -@@ -790,6 +790,7 @@ struct mlx5_cache_ent { - u8 is_tmp:1; - u8 disabled:1; - u8 fill_to_high_water:1; -+ u8 tmp_cleanup_scheduled:1; - - /* - * - limit is the low water mark for stored mkeys, 2* limit is the -@@ -821,7 +822,6 @@ struct mlx5_mkey_cache { - struct mutex rb_lock; - struct dentry *fs_root; - unsigned long last_add; -- struct delayed_work remove_ent_dwork; - }; - - struct mlx5_ib_port_resources { ---- a/drivers/infiniband/hw/mlx5/mr.c -+++ b/drivers/infiniband/hw/mlx5/mr.c -@@ -525,6 +525,23 @@ static void queue_adjust_cache_locked(st - } - } - -+static void clean_keys(struct mlx5_ib_dev *dev, struct mlx5_cache_ent *ent) -+{ -+ u32 mkey; -+ -+ cancel_delayed_work(&ent->dwork); -+ spin_lock_irq(&ent->mkeys_queue.lock); -+ while (ent->mkeys_queue.ci) { -+ mkey = pop_mkey_locked(ent); -+ spin_unlock_irq(&ent->mkeys_queue.lock); -+ mlx5_core_destroy_mkey(dev->mdev, mkey); -+ spin_lock_irq(&ent->mkeys_queue.lock); -+ } -+ ent->tmp_cleanup_scheduled = false; -+ spin_unlock_irq(&ent->mkeys_queue.lock); -+} -+ -+ - static void __cache_work_func(struct mlx5_cache_ent *ent) - { - struct mlx5_ib_dev *dev = ent->dev; -@@ -596,7 +613,11 @@ static void delayed_cache_work_func(stru - struct mlx5_cache_ent *ent; - - ent = container_of(work, struct mlx5_cache_ent, dwork.work); -- __cache_work_func(ent); -+ /* temp entries are never filled, only cleaned */ -+ if (ent->is_tmp) -+ clean_keys(ent->dev, ent); -+ else -+ __cache_work_func(ent); - } - - static int cache_ent_key_cmp(struct mlx5r_cache_rb_key key1, -@@ -771,21 +792,6 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(s - return _mlx5_mr_cache_alloc(dev, ent, access_flags); - } - --static void clean_keys(struct mlx5_ib_dev *dev, struct mlx5_cache_ent *ent) --{ -- u32 mkey; -- -- cancel_delayed_work(&ent->dwork); -- spin_lock_irq(&ent->mkeys_queue.lock); -- while (ent->mkeys_queue.ci) { -- mkey = pop_mkey_locked(ent); -- spin_unlock_irq(&ent->mkeys_queue.lock); -- mlx5_core_destroy_mkey(dev->mdev, mkey); -- spin_lock_irq(&ent->mkeys_queue.lock); -- } -- spin_unlock_irq(&ent->mkeys_queue.lock); --} -- - static void mlx5_mkey_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) - { - if (!mlx5_debugfs_root || dev->is_rep) -@@ -898,10 +904,6 @@ mlx5r_cache_create_ent_locked(struct mlx - ent->limit = 0; - - mlx5_mkey_cache_debugfs_add_ent(dev, ent); -- } else { -- mod_delayed_work(ent->dev->cache.wq, -- &ent->dev->cache.remove_ent_dwork, -- msecs_to_jiffies(30 * 1000)); - } - - return ent; -@@ -912,35 +914,6 @@ mkeys_err: - return ERR_PTR(ret); - } - --static void remove_ent_work_func(struct work_struct *work) --{ -- struct mlx5_mkey_cache *cache; -- struct mlx5_cache_ent *ent; -- struct rb_node *cur; -- -- cache = container_of(work, struct mlx5_mkey_cache, -- remove_ent_dwork.work); -- mutex_lock(&cache->rb_lock); -- cur = rb_last(&cache->rb_root); -- while (cur) { -- ent = rb_entry(cur, struct mlx5_cache_ent, node); -- cur = rb_prev(cur); -- mutex_unlock(&cache->rb_lock); -- -- spin_lock_irq(&ent->mkeys_queue.lock); -- if (!ent->is_tmp) { -- spin_unlock_irq(&ent->mkeys_queue.lock); -- mutex_lock(&cache->rb_lock); -- continue; -- } -- spin_unlock_irq(&ent->mkeys_queue.lock); -- -- clean_keys(ent->dev, ent); -- mutex_lock(&cache->rb_lock); -- } -- mutex_unlock(&cache->rb_lock); --} -- - int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev) - { - struct mlx5_mkey_cache *cache = &dev->cache; -@@ -956,7 +929,6 @@ int mlx5_mkey_cache_init(struct mlx5_ib_ - mutex_init(&dev->slow_path_mutex); - mutex_init(&dev->cache.rb_lock); - dev->cache.rb_root = RB_ROOT; -- INIT_DELAYED_WORK(&dev->cache.remove_ent_dwork, remove_ent_work_func); - cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM); - if (!cache->wq) { - mlx5_ib_warn(dev, "failed to create work queue\n"); -@@ -1007,7 +979,6 @@ int mlx5_mkey_cache_cleanup(struct mlx5_ - return 0; - - mutex_lock(&dev->cache.rb_lock); -- cancel_delayed_work(&dev->cache.remove_ent_dwork); - for (node = rb_first(root); node; node = rb_next(node)) { - ent = rb_entry(node, struct mlx5_cache_ent, node); - spin_lock_irq(&ent->mkeys_queue.lock); -@@ -1844,8 +1815,18 @@ static int mlx5_revoke_mr(struct mlx5_ib - struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); - struct mlx5_cache_ent *ent = mr->mmkey.cache_ent; - -- if (mr->mmkey.cacheable && !mlx5r_umr_revoke_mr(mr) && !cache_ent_find_and_store(dev, mr)) -+ if (mr->mmkey.cacheable && !mlx5r_umr_revoke_mr(mr) && !cache_ent_find_and_store(dev, mr)) { -+ ent = mr->mmkey.cache_ent; -+ /* upon storing to a clean temp entry - schedule its cleanup */ -+ spin_lock_irq(&ent->mkeys_queue.lock); -+ if (ent->is_tmp && !ent->tmp_cleanup_scheduled) { -+ mod_delayed_work(ent->dev->cache.wq, &ent->dwork, -+ msecs_to_jiffies(30 * 1000)); -+ ent->tmp_cleanup_scheduled = true; -+ } -+ spin_unlock_irq(&ent->mkeys_queue.lock); - return 0; -+ } - - if (ent) { - spin_lock_irq(&ent->mkeys_queue.lock); diff --git a/queue-6.1/rdma-mlx5-fix-the-recovery-flow-of-the-umr-qp.patch b/queue-6.1/rdma-mlx5-fix-the-recovery-flow-of-the-umr-qp.patch deleted file mode 100644 index 401d194fd6..0000000000 --- a/queue-6.1/rdma-mlx5-fix-the-recovery-flow-of-the-umr-qp.patch +++ /dev/null @@ -1,209 +0,0 @@ -From 15ed43c7d41f9929ea55919272003c7ba5aec402 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Sun, 19 Jan 2025 14:36:13 +0200 -Subject: RDMA/mlx5: Fix the recovery flow of the UMR QP - -From: Yishai Hadas - -[ Upstream commit d97505baea64d93538b16baf14ce7b8c1fbad746 ] - -This patch addresses an issue in the recovery flow of the UMR QP, -ensuring tasks do not get stuck, as highlighted by the call trace [1]. - -During recovery, before transitioning the QP to the RESET state, the -software must wait for all outstanding WRs to complete. - -Failing to do so can cause the firmware to skip sending some flushed -CQEs with errors and simply discard them upon the RESET, as per the IB -specification. - -This race condition can result in lost CQEs and tasks becoming stuck. - -To resolve this, the patch sends a final WR which serves only as a -barrier before moving the QP state to RESET. - -Once a CQE is received for that final WR, it guarantees that no -outstanding WRs remain, making it safe to transition the QP to RESET and -subsequently back to RTS, restoring proper functionality. - -Note: -For the barrier WR, we simply reuse the failed and ready WR. -Since the QP is in an error state, it will only receive -IB_WC_WR_FLUSH_ERR. However, as it serves only as a barrier we don't -care about its status. - -[1] -INFO: task rdma_resource_l:1922 blocked for more than 120 seconds. -Tainted: G W 6.12.0-rc7+ #1626 -"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. -task:rdma_resource_l state:D stack:0 pid:1922 tgid:1922 ppid:1369 - flags:0x00004004 -Call Trace: - -__schedule+0x420/0xd30 -schedule+0x47/0x130 -schedule_timeout+0x280/0x300 -? mark_held_locks+0x48/0x80 -? lockdep_hardirqs_on_prepare+0xe5/0x1a0 -wait_for_completion+0x75/0x130 -mlx5r_umr_post_send_wait+0x3c2/0x5b0 [mlx5_ib] -? __pfx_mlx5r_umr_done+0x10/0x10 [mlx5_ib] -mlx5r_umr_revoke_mr+0x93/0xc0 [mlx5_ib] -__mlx5_ib_dereg_mr+0x299/0x520 [mlx5_ib] -? _raw_spin_unlock_irq+0x24/0x40 -? wait_for_completion+0xfe/0x130 -? rdma_restrack_put+0x63/0xe0 [ib_core] -ib_dereg_mr_user+0x5f/0x120 [ib_core] -? lock_release+0xc6/0x280 -destroy_hw_idr_uobject+0x1d/0x60 [ib_uverbs] -uverbs_destroy_uobject+0x58/0x1d0 [ib_uverbs] -uobj_destroy+0x3f/0x70 [ib_uverbs] -ib_uverbs_cmd_verbs+0x3e4/0xbb0 [ib_uverbs] -? __pfx_uverbs_destroy_def_handler+0x10/0x10 [ib_uverbs] -? __lock_acquire+0x64e/0x2080 -? mark_held_locks+0x48/0x80 -? find_held_lock+0x2d/0xa0 -? lock_acquire+0xc1/0x2f0 -? ib_uverbs_ioctl+0xcb/0x170 [ib_uverbs] -? __fget_files+0xc3/0x1b0 -ib_uverbs_ioctl+0xe7/0x170 [ib_uverbs] -? ib_uverbs_ioctl+0xcb/0x170 [ib_uverbs] -__x64_sys_ioctl+0x1b0/0xa70 -do_syscall_64+0x6b/0x140 -entry_SYSCALL_64_after_hwframe+0x76/0x7e -RIP: 0033:0x7f99c918b17b -RSP: 002b:00007ffc766d0468 EFLAGS: 00000246 ORIG_RAX: - 0000000000000010 -RAX: ffffffffffffffda RBX: 00007ffc766d0578 RCX: - 00007f99c918b17b -RDX: 00007ffc766d0560 RSI: 00000000c0181b01 RDI: - 0000000000000003 -RBP: 00007ffc766d0540 R08: 00007f99c8f99010 R09: - 000000000000bd7e -R10: 00007f99c94c1c70 R11: 0000000000000246 R12: - 00007ffc766d0530 -R13: 000000000000001c R14: 0000000040246a80 R15: - 0000000000000000 - - -Fixes: 158e71bb69e3 ("RDMA/mlx5: Add a umr recovery flow") -Signed-off-by: Yishai Hadas -Reviewed-by: Michael Guralnik -Link: https://patch.msgid.link/27b51b92ec42dfb09d8096fcbd51878f397ce6ec.1737290141.git.leon@kernel.org -Signed-off-by: Leon Romanovsky -Signed-off-by: Sasha Levin ---- - drivers/infiniband/hw/mlx5/umr.c | 83 +++++++++++++++++++++----------- - 1 file changed, 56 insertions(+), 27 deletions(-) - -diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c -index fa000182d0b41..1a39e86178ece 100644 ---- a/drivers/infiniband/hw/mlx5/umr.c -+++ b/drivers/infiniband/hw/mlx5/umr.c -@@ -199,30 +199,6 @@ void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev) - ib_dealloc_pd(dev->umrc.pd); - } - --static int mlx5r_umr_recover(struct mlx5_ib_dev *dev) --{ -- struct umr_common *umrc = &dev->umrc; -- struct ib_qp_attr attr; -- int err; -- -- attr.qp_state = IB_QPS_RESET; -- err = ib_modify_qp(umrc->qp, &attr, IB_QP_STATE); -- if (err) { -- mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n"); -- goto err; -- } -- -- err = mlx5r_umr_qp_rst2rts(dev, umrc->qp); -- if (err) -- goto err; -- -- umrc->state = MLX5_UMR_STATE_ACTIVE; -- return 0; -- --err: -- umrc->state = MLX5_UMR_STATE_ERR; -- return err; --} - - static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe, - struct mlx5r_umr_wqe *wqe, bool with_data) -@@ -270,6 +246,61 @@ static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe, - return err; - } - -+static int mlx5r_umr_recover(struct mlx5_ib_dev *dev, u32 mkey, -+ struct mlx5r_umr_context *umr_context, -+ struct mlx5r_umr_wqe *wqe, bool with_data) -+{ -+ struct umr_common *umrc = &dev->umrc; -+ struct ib_qp_attr attr; -+ int err; -+ -+ mutex_lock(&umrc->lock); -+ /* Preventing any further WRs to be sent now */ -+ if (umrc->state != MLX5_UMR_STATE_RECOVER) { -+ mlx5_ib_warn(dev, "UMR recovery encountered an unexpected state=%d\n", -+ umrc->state); -+ umrc->state = MLX5_UMR_STATE_RECOVER; -+ } -+ mutex_unlock(&umrc->lock); -+ -+ /* Sending a final/barrier WR (the failed one) and wait for its completion. -+ * This will ensure that all the previous WRs got a completion before -+ * we set the QP state to RESET. -+ */ -+ err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context->cqe, wqe, -+ with_data); -+ if (err) { -+ mlx5_ib_warn(dev, "UMR recovery post send failed, err %d\n", err); -+ goto err; -+ } -+ -+ /* Since the QP is in an error state, it will only receive -+ * IB_WC_WR_FLUSH_ERR. However, as it serves only as a barrier -+ * we don't care about its status. -+ */ -+ wait_for_completion(&umr_context->done); -+ -+ attr.qp_state = IB_QPS_RESET; -+ err = ib_modify_qp(umrc->qp, &attr, IB_QP_STATE); -+ if (err) { -+ mlx5_ib_warn(dev, "Couldn't modify UMR QP to RESET, err=%d\n", err); -+ goto err; -+ } -+ -+ err = mlx5r_umr_qp_rst2rts(dev, umrc->qp); -+ if (err) { -+ mlx5_ib_warn(dev, "Couldn't modify UMR QP to RTS, err=%d\n", err); -+ goto err; -+ } -+ -+ umrc->state = MLX5_UMR_STATE_ACTIVE; -+ return 0; -+ -+err: -+ umrc->state = MLX5_UMR_STATE_ERR; -+ return err; -+} -+ - static void mlx5r_umr_done(struct ib_cq *cq, struct ib_wc *wc) - { - struct mlx5_ib_umr_context *context = -@@ -334,9 +365,7 @@ static int mlx5r_umr_post_send_wait(struct mlx5_ib_dev *dev, u32 mkey, - mlx5_ib_warn(dev, - "reg umr failed (%u). Trying to recover and resubmit the flushed WQEs, mkey = %u\n", - umr_context.status, mkey); -- mutex_lock(&umrc->lock); -- err = mlx5r_umr_recover(dev); -- mutex_unlock(&umrc->lock); -+ err = mlx5r_umr_recover(dev, mkey, &umr_context, wqe, with_data); - if (err) - mlx5_ib_warn(dev, "couldn't recover UMR, err %d\n", - err); --- -2.39.5 - diff --git a/queue-6.1/rdma-mlx5-follow-rb_key.ats-when-creating-new-mkeys.patch b/queue-6.1/rdma-mlx5-follow-rb_key.ats-when-creating-new-mkeys.patch deleted file mode 100644 index c513393fce..0000000000 --- a/queue-6.1/rdma-mlx5-follow-rb_key.ats-when-creating-new-mkeys.patch +++ /dev/null @@ -1,37 +0,0 @@ -From f637040c3339a2ed8c12d65ad03f9552386e2fe7 Mon Sep 17 00:00:00 2001 -From: Jason Gunthorpe -Date: Tue, 28 May 2024 15:52:53 +0300 -Subject: RDMA/mlx5: Follow rb_key.ats when creating new mkeys - -From: Jason Gunthorpe - -commit f637040c3339a2ed8c12d65ad03f9552386e2fe7 upstream. - -When a cache ent already exists but doesn't have any mkeys in it the cache -will automatically create a new one based on the specification in the -ent->rb_key. - -ent->ats was missed when creating the new key and so ma_translation_mode -was not being set even though the ent requires it. - -Cc: stable@vger.kernel.org -Fixes: 73d09b2fe833 ("RDMA/mlx5: Introduce mlx5r_cache_rb_key") -Signed-off-by: Jason Gunthorpe -Reviewed-by: Michael Guralnik -Link: https://lore.kernel.org/r/7c5613458ecb89fbe5606b7aa4c8d990bdea5b9a.1716900410.git.leon@kernel.org -Signed-off-by: Leon Romanovsky -Signed-off-by: Greg Kroah-Hartman ---- - drivers/infiniband/hw/mlx5/mr.c | 1 + - 1 file changed, 1 insertion(+) - ---- a/drivers/infiniband/hw/mlx5/mr.c -+++ b/drivers/infiniband/hw/mlx5/mr.c -@@ -243,6 +243,7 @@ static void set_cache_mkc(struct mlx5_ca - MLX5_SET(mkc, mkc, access_mode_1_0, ent->rb_key.access_mode & 0x3); - MLX5_SET(mkc, mkc, access_mode_4_2, - (ent->rb_key.access_mode >> 2) & 0x7); -+ MLX5_SET(mkc, mkc, ma_translation_mode, !!ent->rb_key.ats); - - MLX5_SET(mkc, mkc, translations_octword_size, - get_mkc_octo_size(ent->rb_key.access_mode, diff --git a/queue-6.1/rdma-mlx5-implement-mkeys-management-via-lifo-queue.patch b/queue-6.1/rdma-mlx5-implement-mkeys-management-via-lifo-queue.patch deleted file mode 100644 index c89fbf4410..0000000000 --- a/queue-6.1/rdma-mlx5-implement-mkeys-management-via-lifo-queue.patch +++ /dev/null @@ -1,704 +0,0 @@ -From 73daa66bd410fa9662f7e4578ac5b58338c23b31 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Thu, 21 Sep 2023 11:07:16 +0300 -Subject: RDMA/mlx5: Implement mkeys management via LIFO queue - -From: Shay Drory - -[ Upstream commit 57e7071683ef6148c9f5ea0ba84598d2ba681375 ] - -Currently, mkeys are managed via xarray. This implementation leads to -a degradation in cases many MRs are unregistered in parallel, due to xarray -internal implementation, for example: deregistration 1M MRs via 64 threads -is taking ~15% more time[1]. - -Hence, implement mkeys management via LIFO queue, which solved the -degradation. - -[1] -2.8us in kernel v5.19 compare to 3.2us in kernel v6.4 - -Signed-off-by: Shay Drory -Link: https://lore.kernel.org/r/fde3d4cfab0f32f0ccb231cd113298256e1502c5.1695283384.git.leon@kernel.org -Signed-off-by: Leon Romanovsky -Stable-dep-of: d97505baea64 ("RDMA/mlx5: Fix the recovery flow of the UMR QP") -Signed-off-by: Sasha Levin ---- - drivers/infiniband/hw/mlx5/mlx5_ib.h | 21 +- - drivers/infiniband/hw/mlx5/mr.c | 324 ++++++++++++--------------- - drivers/infiniband/hw/mlx5/umr.c | 4 +- - 3 files changed, 169 insertions(+), 180 deletions(-) - -diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h -index 7c72e0e9db54a..024d2071c6a5d 100644 ---- a/drivers/infiniband/hw/mlx5/mlx5_ib.h -+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h -@@ -760,10 +760,25 @@ struct umr_common { - unsigned int state; - }; - -+#define NUM_MKEYS_PER_PAGE \ -+ ((PAGE_SIZE - sizeof(struct list_head)) / sizeof(u32)) -+ -+struct mlx5_mkeys_page { -+ u32 mkeys[NUM_MKEYS_PER_PAGE]; -+ struct list_head list; -+}; -+static_assert(sizeof(struct mlx5_mkeys_page) == PAGE_SIZE); -+ -+struct mlx5_mkeys_queue { -+ struct list_head pages_list; -+ u32 num_pages; -+ unsigned long ci; -+ spinlock_t lock; /* sync list ops */ -+}; -+ - struct mlx5_cache_ent { -- struct xarray mkeys; -- unsigned long stored; -- unsigned long reserved; -+ struct mlx5_mkeys_queue mkeys_queue; -+ u32 pending; - - char name[4]; - -diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c -index 2c1a935734273..b66b8346c2dc6 100644 ---- a/drivers/infiniband/hw/mlx5/mr.c -+++ b/drivers/infiniband/hw/mlx5/mr.c -@@ -140,110 +140,47 @@ static void create_mkey_warn(struct mlx5_ib_dev *dev, int status, void *out) - mlx5_cmd_out_err(dev->mdev, MLX5_CMD_OP_CREATE_MKEY, 0, out); - } - --static int push_mkey_locked(struct mlx5_cache_ent *ent, bool limit_pendings, -- void *to_store) -+static int push_mkey_locked(struct mlx5_cache_ent *ent, u32 mkey) - { -- XA_STATE(xas, &ent->mkeys, 0); -- void *curr; -+ unsigned long tmp = ent->mkeys_queue.ci % NUM_MKEYS_PER_PAGE; -+ struct mlx5_mkeys_page *page; - -- if (limit_pendings && -- (ent->reserved - ent->stored) > MAX_PENDING_REG_MR) -- return -EAGAIN; -- -- while (1) { -- /* -- * This is cmpxchg (NULL, XA_ZERO_ENTRY) however this version -- * doesn't transparently unlock. Instead we set the xas index to -- * the current value of reserved every iteration. -- */ -- xas_set(&xas, ent->reserved); -- curr = xas_load(&xas); -- if (!curr) { -- if (to_store && ent->stored == ent->reserved) -- xas_store(&xas, to_store); -- else -- xas_store(&xas, XA_ZERO_ENTRY); -- if (xas_valid(&xas)) { -- ent->reserved++; -- if (to_store) { -- if (ent->stored != ent->reserved) -- __xa_store(&ent->mkeys, -- ent->stored, -- to_store, -- GFP_KERNEL); -- ent->stored++; -- queue_adjust_cache_locked(ent); -- WRITE_ONCE(ent->dev->cache.last_add, -- jiffies); -- } -- } -- } -- xa_unlock_irq(&ent->mkeys); -- -- /* -- * Notice xas_nomem() must always be called as it cleans -- * up any cached allocation. -- */ -- if (!xas_nomem(&xas, GFP_KERNEL)) -- break; -- xa_lock_irq(&ent->mkeys); -+ lockdep_assert_held(&ent->mkeys_queue.lock); -+ if (ent->mkeys_queue.ci >= -+ ent->mkeys_queue.num_pages * NUM_MKEYS_PER_PAGE) { -+ page = kzalloc(sizeof(*page), GFP_ATOMIC); -+ if (!page) -+ return -ENOMEM; -+ ent->mkeys_queue.num_pages++; -+ list_add_tail(&page->list, &ent->mkeys_queue.pages_list); -+ } else { -+ page = list_last_entry(&ent->mkeys_queue.pages_list, -+ struct mlx5_mkeys_page, list); - } -- xa_lock_irq(&ent->mkeys); -- if (xas_error(&xas)) -- return xas_error(&xas); -- if (WARN_ON(curr)) -- return -EINVAL; -- return 0; --} -- --static int push_mkey(struct mlx5_cache_ent *ent, bool limit_pendings, -- void *to_store) --{ -- int ret; -- -- xa_lock_irq(&ent->mkeys); -- ret = push_mkey_locked(ent, limit_pendings, to_store); -- xa_unlock_irq(&ent->mkeys); -- return ret; --} -- --static void undo_push_reserve_mkey(struct mlx5_cache_ent *ent) --{ -- void *old; -- -- ent->reserved--; -- old = __xa_erase(&ent->mkeys, ent->reserved); -- WARN_ON(old); --} -- --static void push_to_reserved(struct mlx5_cache_ent *ent, u32 mkey) --{ -- void *old; - -- old = __xa_store(&ent->mkeys, ent->stored, xa_mk_value(mkey), 0); -- WARN_ON(old); -- ent->stored++; -+ page->mkeys[tmp] = mkey; -+ ent->mkeys_queue.ci++; -+ return 0; - } - --static u32 pop_stored_mkey(struct mlx5_cache_ent *ent) -+static int pop_mkey_locked(struct mlx5_cache_ent *ent) - { -- void *old, *xa_mkey; -- -- ent->stored--; -- ent->reserved--; -+ unsigned long tmp = (ent->mkeys_queue.ci - 1) % NUM_MKEYS_PER_PAGE; -+ struct mlx5_mkeys_page *last_page; -+ u32 mkey; - -- if (ent->stored == ent->reserved) { -- xa_mkey = __xa_erase(&ent->mkeys, ent->stored); -- WARN_ON(!xa_mkey); -- return (u32)xa_to_value(xa_mkey); -+ lockdep_assert_held(&ent->mkeys_queue.lock); -+ last_page = list_last_entry(&ent->mkeys_queue.pages_list, -+ struct mlx5_mkeys_page, list); -+ mkey = last_page->mkeys[tmp]; -+ last_page->mkeys[tmp] = 0; -+ ent->mkeys_queue.ci--; -+ if (ent->mkeys_queue.num_pages > 1 && !tmp) { -+ list_del(&last_page->list); -+ ent->mkeys_queue.num_pages--; -+ kfree(last_page); - } -- -- xa_mkey = __xa_store(&ent->mkeys, ent->stored, XA_ZERO_ENTRY, -- GFP_KERNEL); -- WARN_ON(!xa_mkey || xa_is_err(xa_mkey)); -- old = __xa_erase(&ent->mkeys, ent->reserved); -- WARN_ON(old); -- return (u32)xa_to_value(xa_mkey); -+ return mkey; - } - - static void create_mkey_callback(int status, struct mlx5_async_work *context) -@@ -257,10 +194,10 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context) - if (status) { - create_mkey_warn(dev, status, mkey_out->out); - kfree(mkey_out); -- xa_lock_irqsave(&ent->mkeys, flags); -- undo_push_reserve_mkey(ent); -+ spin_lock_irqsave(&ent->mkeys_queue.lock, flags); -+ ent->pending--; - WRITE_ONCE(dev->fill_delay, 1); -- xa_unlock_irqrestore(&ent->mkeys, flags); -+ spin_unlock_irqrestore(&ent->mkeys_queue.lock, flags); - mod_timer(&dev->delay_timer, jiffies + HZ); - return; - } -@@ -269,11 +206,12 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context) - MLX5_GET(create_mkey_out, mkey_out->out, mkey_index)); - WRITE_ONCE(dev->cache.last_add, jiffies); - -- xa_lock_irqsave(&ent->mkeys, flags); -- push_to_reserved(ent, mkey_out->mkey); -+ spin_lock_irqsave(&ent->mkeys_queue.lock, flags); -+ push_mkey_locked(ent, mkey_out->mkey); - /* If we are doing fill_to_high_water then keep going. */ - queue_adjust_cache_locked(ent); -- xa_unlock_irqrestore(&ent->mkeys, flags); -+ ent->pending--; -+ spin_unlock_irqrestore(&ent->mkeys_queue.lock, flags); - kfree(mkey_out); - } - -@@ -329,24 +267,28 @@ static int add_keys(struct mlx5_cache_ent *ent, unsigned int num) - set_cache_mkc(ent, mkc); - async_create->ent = ent; - -- err = push_mkey(ent, true, NULL); -- if (err) -+ spin_lock_irq(&ent->mkeys_queue.lock); -+ if (ent->pending >= MAX_PENDING_REG_MR) { -+ err = -EAGAIN; - goto free_async_create; -+ } -+ ent->pending++; -+ spin_unlock_irq(&ent->mkeys_queue.lock); - - err = mlx5_ib_create_mkey_cb(async_create); - if (err) { - mlx5_ib_warn(ent->dev, "create mkey failed %d\n", err); -- goto err_undo_reserve; -+ goto err_create_mkey; - } - } - - return 0; - --err_undo_reserve: -- xa_lock_irq(&ent->mkeys); -- undo_push_reserve_mkey(ent); -- xa_unlock_irq(&ent->mkeys); -+err_create_mkey: -+ spin_lock_irq(&ent->mkeys_queue.lock); -+ ent->pending--; - free_async_create: -+ spin_unlock_irq(&ent->mkeys_queue.lock); - kfree(async_create); - return err; - } -@@ -379,36 +321,36 @@ static void remove_cache_mr_locked(struct mlx5_cache_ent *ent) - { - u32 mkey; - -- lockdep_assert_held(&ent->mkeys.xa_lock); -- if (!ent->stored) -+ lockdep_assert_held(&ent->mkeys_queue.lock); -+ if (!ent->mkeys_queue.ci) - return; -- mkey = pop_stored_mkey(ent); -- xa_unlock_irq(&ent->mkeys); -+ mkey = pop_mkey_locked(ent); -+ spin_unlock_irq(&ent->mkeys_queue.lock); - mlx5_core_destroy_mkey(ent->dev->mdev, mkey); -- xa_lock_irq(&ent->mkeys); -+ spin_lock_irq(&ent->mkeys_queue.lock); - } - - static int resize_available_mrs(struct mlx5_cache_ent *ent, unsigned int target, - bool limit_fill) -- __acquires(&ent->mkeys) __releases(&ent->mkeys) -+ __acquires(&ent->mkeys_queue.lock) __releases(&ent->mkeys_queue.lock) - { - int err; - -- lockdep_assert_held(&ent->mkeys.xa_lock); -+ lockdep_assert_held(&ent->mkeys_queue.lock); - - while (true) { - if (limit_fill) - target = ent->limit * 2; -- if (target == ent->reserved) -+ if (target == ent->pending + ent->mkeys_queue.ci) - return 0; -- if (target > ent->reserved) { -- u32 todo = target - ent->reserved; -+ if (target > ent->pending + ent->mkeys_queue.ci) { -+ u32 todo = target - (ent->pending + ent->mkeys_queue.ci); - -- xa_unlock_irq(&ent->mkeys); -+ spin_unlock_irq(&ent->mkeys_queue.lock); - err = add_keys(ent, todo); - if (err == -EAGAIN) - usleep_range(3000, 5000); -- xa_lock_irq(&ent->mkeys); -+ spin_lock_irq(&ent->mkeys_queue.lock); - if (err) { - if (err != -EAGAIN) - return err; -@@ -436,7 +378,7 @@ static ssize_t size_write(struct file *filp, const char __user *buf, - * cannot free MRs that are in use. Compute the target value for stored - * mkeys. - */ -- xa_lock_irq(&ent->mkeys); -+ spin_lock_irq(&ent->mkeys_queue.lock); - if (target < ent->in_use) { - err = -EINVAL; - goto err_unlock; -@@ -449,12 +391,12 @@ static ssize_t size_write(struct file *filp, const char __user *buf, - err = resize_available_mrs(ent, target, false); - if (err) - goto err_unlock; -- xa_unlock_irq(&ent->mkeys); -+ spin_unlock_irq(&ent->mkeys_queue.lock); - - return count; - - err_unlock: -- xa_unlock_irq(&ent->mkeys); -+ spin_unlock_irq(&ent->mkeys_queue.lock); - return err; - } - -@@ -465,7 +407,8 @@ static ssize_t size_read(struct file *filp, char __user *buf, size_t count, - char lbuf[20]; - int err; - -- err = snprintf(lbuf, sizeof(lbuf), "%ld\n", ent->stored + ent->in_use); -+ err = snprintf(lbuf, sizeof(lbuf), "%ld\n", -+ ent->mkeys_queue.ci + ent->in_use); - if (err < 0) - return err; - -@@ -494,10 +437,10 @@ static ssize_t limit_write(struct file *filp, const char __user *buf, - * Upon set we immediately fill the cache to high water mark implied by - * the limit. - */ -- xa_lock_irq(&ent->mkeys); -+ spin_lock_irq(&ent->mkeys_queue.lock); - ent->limit = var; - err = resize_available_mrs(ent, 0, true); -- xa_unlock_irq(&ent->mkeys); -+ spin_unlock_irq(&ent->mkeys_queue.lock); - if (err) - return err; - return count; -@@ -533,9 +476,9 @@ static bool someone_adding(struct mlx5_mkey_cache *cache) - mutex_lock(&cache->rb_lock); - for (node = rb_first(&cache->rb_root); node; node = rb_next(node)) { - ent = rb_entry(node, struct mlx5_cache_ent, node); -- xa_lock_irq(&ent->mkeys); -- ret = ent->stored < ent->limit; -- xa_unlock_irq(&ent->mkeys); -+ spin_lock_irq(&ent->mkeys_queue.lock); -+ ret = ent->mkeys_queue.ci < ent->limit; -+ spin_unlock_irq(&ent->mkeys_queue.lock); - if (ret) { - mutex_unlock(&cache->rb_lock); - return true; -@@ -552,26 +495,26 @@ static bool someone_adding(struct mlx5_mkey_cache *cache) - */ - static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent) - { -- lockdep_assert_held(&ent->mkeys.xa_lock); -+ lockdep_assert_held(&ent->mkeys_queue.lock); - - if (ent->disabled || READ_ONCE(ent->dev->fill_delay) || ent->is_tmp) - return; -- if (ent->stored < ent->limit) { -+ if (ent->mkeys_queue.ci < ent->limit) { - ent->fill_to_high_water = true; - mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0); - } else if (ent->fill_to_high_water && -- ent->reserved < 2 * ent->limit) { -+ ent->mkeys_queue.ci + ent->pending < 2 * ent->limit) { - /* - * Once we start populating due to hitting a low water mark - * continue until we pass the high water mark. - */ - mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0); -- } else if (ent->stored == 2 * ent->limit) { -+ } else if (ent->mkeys_queue.ci == 2 * ent->limit) { - ent->fill_to_high_water = false; -- } else if (ent->stored > 2 * ent->limit) { -+ } else if (ent->mkeys_queue.ci > 2 * ent->limit) { - /* Queue deletion of excess entries */ - ent->fill_to_high_water = false; -- if (ent->stored != ent->reserved) -+ if (ent->pending) - queue_delayed_work(ent->dev->cache.wq, &ent->dwork, - msecs_to_jiffies(1000)); - else -@@ -585,15 +528,16 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) - struct mlx5_mkey_cache *cache = &dev->cache; - int err; - -- xa_lock_irq(&ent->mkeys); -+ spin_lock_irq(&ent->mkeys_queue.lock); - if (ent->disabled) - goto out; - -- if (ent->fill_to_high_water && ent->reserved < 2 * ent->limit && -+ if (ent->fill_to_high_water && -+ ent->mkeys_queue.ci + ent->pending < 2 * ent->limit && - !READ_ONCE(dev->fill_delay)) { -- xa_unlock_irq(&ent->mkeys); -+ spin_unlock_irq(&ent->mkeys_queue.lock); - err = add_keys(ent, 1); -- xa_lock_irq(&ent->mkeys); -+ spin_lock_irq(&ent->mkeys_queue.lock); - if (ent->disabled) - goto out; - if (err) { -@@ -611,7 +555,7 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) - msecs_to_jiffies(1000)); - } - } -- } else if (ent->stored > 2 * ent->limit) { -+ } else if (ent->mkeys_queue.ci > 2 * ent->limit) { - bool need_delay; - - /* -@@ -626,11 +570,11 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) - * the garbage collection work to try to run in next cycle, in - * order to free CPU resources to other tasks. - */ -- xa_unlock_irq(&ent->mkeys); -+ spin_unlock_irq(&ent->mkeys_queue.lock); - need_delay = need_resched() || someone_adding(cache) || - !time_after(jiffies, - READ_ONCE(cache->last_add) + 300 * HZ); -- xa_lock_irq(&ent->mkeys); -+ spin_lock_irq(&ent->mkeys_queue.lock); - if (ent->disabled) - goto out; - if (need_delay) { -@@ -641,7 +585,7 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) - queue_adjust_cache_locked(ent); - } - out: -- xa_unlock_irq(&ent->mkeys); -+ spin_unlock_irq(&ent->mkeys_queue.lock); - } - - static void delayed_cache_work_func(struct work_struct *work) -@@ -749,25 +693,25 @@ static struct mlx5_ib_mr *_mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, - if (!mr) - return ERR_PTR(-ENOMEM); - -- xa_lock_irq(&ent->mkeys); -+ spin_lock_irq(&ent->mkeys_queue.lock); - ent->in_use++; - -- if (!ent->stored) { -+ if (!ent->mkeys_queue.ci) { - queue_adjust_cache_locked(ent); - ent->miss++; -- xa_unlock_irq(&ent->mkeys); -+ spin_unlock_irq(&ent->mkeys_queue.lock); - err = create_cache_mkey(ent, &mr->mmkey.key); - if (err) { -- xa_lock_irq(&ent->mkeys); -+ spin_lock_irq(&ent->mkeys_queue.lock); - ent->in_use--; -- xa_unlock_irq(&ent->mkeys); -+ spin_unlock_irq(&ent->mkeys_queue.lock); - kfree(mr); - return ERR_PTR(err); - } - } else { -- mr->mmkey.key = pop_stored_mkey(ent); -+ mr->mmkey.key = pop_mkey_locked(ent); - queue_adjust_cache_locked(ent); -- xa_unlock_irq(&ent->mkeys); -+ spin_unlock_irq(&ent->mkeys_queue.lock); - } - mr->mmkey.cache_ent = ent; - mr->mmkey.type = MLX5_MKEY_MR; -@@ -820,14 +764,14 @@ static void clean_keys(struct mlx5_ib_dev *dev, struct mlx5_cache_ent *ent) - u32 mkey; - - cancel_delayed_work(&ent->dwork); -- xa_lock_irq(&ent->mkeys); -- while (ent->stored) { -- mkey = pop_stored_mkey(ent); -- xa_unlock_irq(&ent->mkeys); -+ spin_lock_irq(&ent->mkeys_queue.lock); -+ while (ent->mkeys_queue.ci) { -+ mkey = pop_mkey_locked(ent); -+ spin_unlock_irq(&ent->mkeys_queue.lock); - mlx5_core_destroy_mkey(dev->mdev, mkey); -- xa_lock_irq(&ent->mkeys); -+ spin_lock_irq(&ent->mkeys_queue.lock); - } -- xa_unlock_irq(&ent->mkeys); -+ spin_unlock_irq(&ent->mkeys_queue.lock); - } - - static void mlx5_mkey_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) -@@ -852,7 +796,7 @@ static void mlx5_mkey_cache_debugfs_add_ent(struct mlx5_ib_dev *dev, - dir = debugfs_create_dir(ent->name, dev->cache.fs_root); - debugfs_create_file("size", 0600, dir, ent, &size_fops); - debugfs_create_file("limit", 0600, dir, ent, &limit_fops); -- debugfs_create_ulong("cur", 0400, dir, &ent->stored); -+ debugfs_create_ulong("cur", 0400, dir, &ent->mkeys_queue.ci); - debugfs_create_u32("miss", 0600, dir, &ent->miss); - } - -@@ -874,6 +818,31 @@ static void delay_time_func(struct timer_list *t) - WRITE_ONCE(dev->fill_delay, 0); - } - -+static int mlx5r_mkeys_init(struct mlx5_cache_ent *ent) -+{ -+ struct mlx5_mkeys_page *page; -+ -+ page = kzalloc(sizeof(*page), GFP_KERNEL); -+ if (!page) -+ return -ENOMEM; -+ INIT_LIST_HEAD(&ent->mkeys_queue.pages_list); -+ spin_lock_init(&ent->mkeys_queue.lock); -+ list_add_tail(&page->list, &ent->mkeys_queue.pages_list); -+ ent->mkeys_queue.num_pages++; -+ return 0; -+} -+ -+static void mlx5r_mkeys_uninit(struct mlx5_cache_ent *ent) -+{ -+ struct mlx5_mkeys_page *page; -+ -+ WARN_ON(ent->mkeys_queue.ci || ent->mkeys_queue.num_pages > 1); -+ page = list_last_entry(&ent->mkeys_queue.pages_list, -+ struct mlx5_mkeys_page, list); -+ list_del(&page->list); -+ kfree(page); -+} -+ - struct mlx5_cache_ent * - mlx5r_cache_create_ent_locked(struct mlx5_ib_dev *dev, - struct mlx5r_cache_rb_key rb_key, -@@ -887,7 +856,9 @@ mlx5r_cache_create_ent_locked(struct mlx5_ib_dev *dev, - if (!ent) - return ERR_PTR(-ENOMEM); - -- xa_init_flags(&ent->mkeys, XA_FLAGS_LOCK_IRQ); -+ ret = mlx5r_mkeys_init(ent); -+ if (ret) -+ goto mkeys_err; - ent->rb_key = rb_key; - ent->dev = dev; - ent->is_tmp = !persistent_entry; -@@ -895,10 +866,8 @@ mlx5r_cache_create_ent_locked(struct mlx5_ib_dev *dev, - INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); - - ret = mlx5_cache_ent_insert(&dev->cache, ent); -- if (ret) { -- kfree(ent); -- return ERR_PTR(ret); -- } -+ if (ret) -+ goto ent_insert_err; - - if (persistent_entry) { - if (rb_key.access_mode == MLX5_MKC_ACCESS_MODE_KSM) -@@ -921,6 +890,11 @@ mlx5r_cache_create_ent_locked(struct mlx5_ib_dev *dev, - } - - return ent; -+ent_insert_err: -+ mlx5r_mkeys_uninit(ent); -+mkeys_err: -+ kfree(ent); -+ return ERR_PTR(ret); - } - - static void remove_ent_work_func(struct work_struct *work) -@@ -938,13 +912,13 @@ static void remove_ent_work_func(struct work_struct *work) - cur = rb_prev(cur); - mutex_unlock(&cache->rb_lock); - -- xa_lock_irq(&ent->mkeys); -+ spin_lock_irq(&ent->mkeys_queue.lock); - if (!ent->is_tmp) { -- xa_unlock_irq(&ent->mkeys); -+ spin_unlock_irq(&ent->mkeys_queue.lock); - mutex_lock(&cache->rb_lock); - continue; - } -- xa_unlock_irq(&ent->mkeys); -+ spin_unlock_irq(&ent->mkeys_queue.lock); - - clean_keys(ent->dev, ent); - mutex_lock(&cache->rb_lock); -@@ -994,9 +968,9 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev) - mutex_unlock(&cache->rb_lock); - for (node = rb_first(root); node; node = rb_next(node)) { - ent = rb_entry(node, struct mlx5_cache_ent, node); -- xa_lock_irq(&ent->mkeys); -+ spin_lock_irq(&ent->mkeys_queue.lock); - queue_adjust_cache_locked(ent); -- xa_unlock_irq(&ent->mkeys); -+ spin_unlock_irq(&ent->mkeys_queue.lock); - } - - return 0; -@@ -1020,9 +994,9 @@ int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev) - mutex_lock(&dev->cache.rb_lock); - for (node = rb_first(root); node; node = rb_next(node)) { - ent = rb_entry(node, struct mlx5_cache_ent, node); -- xa_lock_irq(&ent->mkeys); -+ spin_lock_irq(&ent->mkeys_queue.lock); - ent->disabled = true; -- xa_unlock_irq(&ent->mkeys); -+ spin_unlock_irq(&ent->mkeys_queue.lock); - cancel_delayed_work_sync(&ent->dwork); - } - -@@ -1035,6 +1009,7 @@ int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev) - node = rb_next(node); - clean_keys(dev, ent); - rb_erase(&ent->node, root); -+ mlx5r_mkeys_uninit(ent); - kfree(ent); - } - mutex_unlock(&dev->cache.rb_lock); -@@ -1802,7 +1777,7 @@ static int cache_ent_find_and_store(struct mlx5_ib_dev *dev, - int ret; - - if (mr->mmkey.cache_ent) { -- xa_lock_irq(&mr->mmkey.cache_ent->mkeys); -+ spin_lock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock); - mr->mmkey.cache_ent->in_use--; - goto end; - } -@@ -1816,7 +1791,7 @@ static int cache_ent_find_and_store(struct mlx5_ib_dev *dev, - return -EOPNOTSUPP; - } - mr->mmkey.cache_ent = ent; -- xa_lock_irq(&mr->mmkey.cache_ent->mkeys); -+ spin_lock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock); - mutex_unlock(&cache->rb_lock); - goto end; - } -@@ -1828,12 +1803,11 @@ static int cache_ent_find_and_store(struct mlx5_ib_dev *dev, - return PTR_ERR(ent); - - mr->mmkey.cache_ent = ent; -- xa_lock_irq(&mr->mmkey.cache_ent->mkeys); -+ spin_lock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock); - - end: -- ret = push_mkey_locked(mr->mmkey.cache_ent, false, -- xa_mk_value(mr->mmkey.key)); -- xa_unlock_irq(&mr->mmkey.cache_ent->mkeys); -+ ret = push_mkey_locked(mr->mmkey.cache_ent, mr->mmkey.key); -+ spin_unlock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock); - return ret; - } - -diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c -index cb5cee3dee2b6..fa000182d0b41 100644 ---- a/drivers/infiniband/hw/mlx5/umr.c -+++ b/drivers/infiniband/hw/mlx5/umr.c -@@ -332,8 +332,8 @@ static int mlx5r_umr_post_send_wait(struct mlx5_ib_dev *dev, u32 mkey, - - WARN_ON_ONCE(1); - mlx5_ib_warn(dev, -- "reg umr failed (%u). Trying to recover and resubmit the flushed WQEs\n", -- umr_context.status); -+ "reg umr failed (%u). Trying to recover and resubmit the flushed WQEs, mkey = %u\n", -+ umr_context.status, mkey); - mutex_lock(&umrc->lock); - err = mlx5r_umr_recover(dev); - mutex_unlock(&umrc->lock); --- -2.39.5 - diff --git a/queue-6.1/rdma-mlx5-introduce-mlx5r_cache_rb_key.patch b/queue-6.1/rdma-mlx5-introduce-mlx5r_cache_rb_key.patch deleted file mode 100644 index 21bcc7561d..0000000000 --- a/queue-6.1/rdma-mlx5-introduce-mlx5r_cache_rb_key.patch +++ /dev/null @@ -1,565 +0,0 @@ -From dee0c2d2ab0dbb79d87e227f8b4136f1764cefb4 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Thu, 26 Jan 2023 00:28:05 +0200 -Subject: RDMA/mlx5: Introduce mlx5r_cache_rb_key - -From: Michael Guralnik - -[ Upstream commit 73d09b2fe8336f5f37935e46418666ddbcd3c343 ] - -Switch from using the mkey order to using the new struct as the key to the -RB tree of cache entries. - -The key is all the mkey properties that UMR operations can't modify. -Using this key to define the cache entries and to search and create cache -mkeys. - -Link: https://lore.kernel.org/r/20230125222807.6921-5-michaelgur@nvidia.com -Signed-off-by: Michael Guralnik -Signed-off-by: Jason Gunthorpe -Stable-dep-of: d97505baea64 ("RDMA/mlx5: Fix the recovery flow of the UMR QP") -Signed-off-by: Sasha Levin ---- - drivers/infiniband/hw/mlx5/mlx5_ib.h | 27 ++-- - drivers/infiniband/hw/mlx5/mr.c | 228 +++++++++++++++++++-------- - drivers/infiniband/hw/mlx5/odp.c | 30 ++-- - 3 files changed, 201 insertions(+), 84 deletions(-) - -diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h -index bd998ac8c29c1..7c9d5648947e9 100644 ---- a/drivers/infiniband/hw/mlx5/mlx5_ib.h -+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h -@@ -637,6 +637,13 @@ enum mlx5_mkey_type { - MLX5_MKEY_INDIRECT_DEVX, - }; - -+struct mlx5r_cache_rb_key { -+ u8 ats:1; -+ unsigned int access_mode; -+ unsigned int access_flags; -+ unsigned int ndescs; -+}; -+ - struct mlx5_ib_mkey { - u32 key; - enum mlx5_mkey_type type; -@@ -757,11 +764,9 @@ struct mlx5_cache_ent { - unsigned long reserved; - - char name[4]; -- u32 order; -- u32 access_mode; -- unsigned int ndescs; - - struct rb_node node; -+ struct mlx5r_cache_rb_key rb_key; - - u8 disabled:1; - u8 fill_to_high_water:1; -@@ -1340,14 +1345,13 @@ int mlx5_ib_get_cqe_size(struct ib_cq *ibcq); - int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev); - int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev); - struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev, -- int order); -+ struct mlx5r_cache_rb_key rb_key, -+ bool persistent_entry); - - struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, -- struct mlx5_cache_ent *ent, -- int access_flags); -+ int access_flags, int access_mode, -+ int ndescs); - --struct mlx5_ib_mr *mlx5_mr_cache_alloc_order(struct mlx5_ib_dev *dev, u32 order, -- int access_flags); - int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, - struct ib_mr_status *mr_status); - struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, -@@ -1370,7 +1374,7 @@ int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq); - void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev); - int __init mlx5_ib_odp_init(void); - void mlx5_ib_odp_cleanup(void); --void mlx5_odp_init_mkey_cache_entry(struct mlx5_cache_ent *ent); -+int mlx5_odp_init_mkey_cache(struct mlx5_ib_dev *dev); - void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries, - struct mlx5_ib_mr *mr, int flags); - -@@ -1389,7 +1393,10 @@ static inline int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev, - static inline void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev) {} - static inline int mlx5_ib_odp_init(void) { return 0; } - static inline void mlx5_ib_odp_cleanup(void) {} --static inline void mlx5_odp_init_mkey_cache_entry(struct mlx5_cache_ent *ent) {} -+static inline int mlx5_odp_init_mkey_cache(struct mlx5_ib_dev *dev) -+{ -+ return 0; -+} - static inline void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries, - struct mlx5_ib_mr *mr, int flags) {} - -diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c -index b3d83920d3cfb..1060b30a837a0 100644 ---- a/drivers/infiniband/hw/mlx5/mr.c -+++ b/drivers/infiniband/hw/mlx5/mr.c -@@ -292,11 +292,13 @@ static void set_cache_mkc(struct mlx5_cache_ent *ent, void *mkc) - set_mkc_access_pd_addr_fields(mkc, 0, 0, ent->dev->umrc.pd); - MLX5_SET(mkc, mkc, free, 1); - MLX5_SET(mkc, mkc, umr_en, 1); -- MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3); -- MLX5_SET(mkc, mkc, access_mode_4_2, (ent->access_mode >> 2) & 0x7); -+ MLX5_SET(mkc, mkc, access_mode_1_0, ent->rb_key.access_mode & 0x3); -+ MLX5_SET(mkc, mkc, access_mode_4_2, -+ (ent->rb_key.access_mode >> 2) & 0x7); - - MLX5_SET(mkc, mkc, translations_octword_size, -- get_mkc_octo_size(ent->access_mode, ent->ndescs)); -+ get_mkc_octo_size(ent->rb_key.access_mode, -+ ent->rb_key.ndescs)); - MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); - } - -@@ -594,8 +596,8 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) - if (err != -EAGAIN) { - mlx5_ib_warn( - dev, -- "command failed order %d, err %d\n", -- ent->order, err); -+ "add keys command failed, err %d\n", -+ err); - queue_delayed_work(cache->wq, &ent->dwork, - msecs_to_jiffies(1000)); - } -@@ -641,22 +643,49 @@ static void delayed_cache_work_func(struct work_struct *work) - __cache_work_func(ent); - } - -+static int cache_ent_key_cmp(struct mlx5r_cache_rb_key key1, -+ struct mlx5r_cache_rb_key key2) -+{ -+ int res; -+ -+ res = key1.ats - key2.ats; -+ if (res) -+ return res; -+ -+ res = key1.access_mode - key2.access_mode; -+ if (res) -+ return res; -+ -+ res = key1.access_flags - key2.access_flags; -+ if (res) -+ return res; -+ -+ /* -+ * keep ndescs the last in the compare table since the find function -+ * searches for an exact match on all properties and only closest -+ * match in size. -+ */ -+ return key1.ndescs - key2.ndescs; -+} -+ - static int mlx5_cache_ent_insert(struct mlx5_mkey_cache *cache, - struct mlx5_cache_ent *ent) - { - struct rb_node **new = &cache->rb_root.rb_node, *parent = NULL; - struct mlx5_cache_ent *cur; -+ int cmp; - - mutex_lock(&cache->rb_lock); - /* Figure out where to put new node */ - while (*new) { - cur = rb_entry(*new, struct mlx5_cache_ent, node); - parent = *new; -- if (ent->order < cur->order) -+ cmp = cache_ent_key_cmp(cur->rb_key, ent->rb_key); -+ if (cmp > 0) - new = &((*new)->rb_left); -- if (ent->order > cur->order) -+ if (cmp < 0) - new = &((*new)->rb_right); -- if (ent->order == cur->order) { -+ if (cmp == 0) { - mutex_unlock(&cache->rb_lock); - return -EEXIST; - } -@@ -670,40 +699,45 @@ static int mlx5_cache_ent_insert(struct mlx5_mkey_cache *cache, - return 0; - } - --static struct mlx5_cache_ent *mkey_cache_ent_from_order(struct mlx5_ib_dev *dev, -- unsigned int order) -+static struct mlx5_cache_ent * -+mkey_cache_ent_from_rb_key(struct mlx5_ib_dev *dev, -+ struct mlx5r_cache_rb_key rb_key) - { - struct rb_node *node = dev->cache.rb_root.rb_node; - struct mlx5_cache_ent *cur, *smallest = NULL; -+ int cmp; - - /* - * Find the smallest ent with order >= requested_order. - */ - while (node) { - cur = rb_entry(node, struct mlx5_cache_ent, node); -- if (cur->order > order) { -+ cmp = cache_ent_key_cmp(cur->rb_key, rb_key); -+ if (cmp > 0) { - smallest = cur; - node = node->rb_left; - } -- if (cur->order < order) -+ if (cmp < 0) - node = node->rb_right; -- if (cur->order == order) -+ if (cmp == 0) - return cur; - } - -- return smallest; -+ return (smallest && -+ smallest->rb_key.access_mode == rb_key.access_mode && -+ smallest->rb_key.access_flags == rb_key.access_flags && -+ smallest->rb_key.ats == rb_key.ats) ? -+ smallest : -+ NULL; - } - --struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, -- struct mlx5_cache_ent *ent, -- int access_flags) -+static struct mlx5_ib_mr *_mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, -+ struct mlx5_cache_ent *ent, -+ int access_flags) - { - struct mlx5_ib_mr *mr; - int err; - -- if (!mlx5r_umr_can_reconfig(dev, 0, access_flags)) -- return ERR_PTR(-EOPNOTSUPP); -- - mr = kzalloc(sizeof(*mr), GFP_KERNEL); - if (!mr) - return ERR_PTR(-ENOMEM); -@@ -734,12 +768,44 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, - return mr; - } - --struct mlx5_ib_mr *mlx5_mr_cache_alloc_order(struct mlx5_ib_dev *dev, -- u32 order, int access_flags) -+static int get_unchangeable_access_flags(struct mlx5_ib_dev *dev, -+ int access_flags) -+{ -+ int ret = 0; -+ -+ if ((access_flags & IB_ACCESS_REMOTE_ATOMIC) && -+ MLX5_CAP_GEN(dev->mdev, atomic) && -+ MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)) -+ ret |= IB_ACCESS_REMOTE_ATOMIC; -+ -+ if ((access_flags & IB_ACCESS_RELAXED_ORDERING) && -+ MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) && -+ !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) -+ ret |= IB_ACCESS_RELAXED_ORDERING; -+ -+ if ((access_flags & IB_ACCESS_RELAXED_ORDERING) && -+ MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) && -+ !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) -+ ret |= IB_ACCESS_RELAXED_ORDERING; -+ -+ return ret; -+} -+ -+struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, -+ int access_flags, int access_mode, -+ int ndescs) - { -- struct mlx5_cache_ent *ent = mkey_cache_ent_from_order(dev, order); -+ struct mlx5r_cache_rb_key rb_key = { -+ .ndescs = ndescs, -+ .access_mode = access_mode, -+ .access_flags = get_unchangeable_access_flags(dev, access_flags) -+ }; -+ struct mlx5_cache_ent *ent = mkey_cache_ent_from_rb_key(dev, rb_key); - -- return mlx5_mr_cache_alloc(dev, ent, access_flags); -+ if (!ent) -+ return ERR_PTR(-EOPNOTSUPP); -+ -+ return _mlx5_mr_cache_alloc(dev, ent, access_flags); - } - - static void clean_keys(struct mlx5_ib_dev *dev, struct mlx5_cache_ent *ent) -@@ -766,28 +832,32 @@ static void mlx5_mkey_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) - dev->cache.fs_root = NULL; - } - -+static void mlx5_mkey_cache_debugfs_add_ent(struct mlx5_ib_dev *dev, -+ struct mlx5_cache_ent *ent) -+{ -+ int order = order_base_2(ent->rb_key.ndescs); -+ struct dentry *dir; -+ -+ if (ent->rb_key.access_mode == MLX5_MKC_ACCESS_MODE_KSM) -+ order = MLX5_IMR_KSM_CACHE_ENTRY + 2; -+ -+ sprintf(ent->name, "%d", order); -+ dir = debugfs_create_dir(ent->name, dev->cache.fs_root); -+ debugfs_create_file("size", 0600, dir, ent, &size_fops); -+ debugfs_create_file("limit", 0600, dir, ent, &limit_fops); -+ debugfs_create_ulong("cur", 0400, dir, &ent->stored); -+ debugfs_create_u32("miss", 0600, dir, &ent->miss); -+} -+ - static void mlx5_mkey_cache_debugfs_init(struct mlx5_ib_dev *dev) - { -+ struct dentry *dbg_root = mlx5_debugfs_get_dev_root(dev->mdev); - struct mlx5_mkey_cache *cache = &dev->cache; -- struct mlx5_cache_ent *ent; -- struct dentry *dir; -- int i; - - if (!mlx5_debugfs_root || dev->is_rep) - return; - -- dir = mlx5_debugfs_get_dev_root(dev->mdev); -- cache->fs_root = debugfs_create_dir("mr_cache", dir); -- -- for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) { -- ent = mkey_cache_ent_from_order(dev, i); -- sprintf(ent->name, "%d", ent->order); -- dir = debugfs_create_dir(ent->name, cache->fs_root); -- debugfs_create_file("size", 0600, dir, ent, &size_fops); -- debugfs_create_file("limit", 0600, dir, ent, &limit_fops); -- debugfs_create_ulong("cur", 0400, dir, &ent->stored); -- debugfs_create_u32("miss", 0600, dir, &ent->miss); -- } -+ cache->fs_root = debugfs_create_dir("mr_cache", dbg_root); - } - - static void delay_time_func(struct timer_list *t) -@@ -798,9 +868,11 @@ static void delay_time_func(struct timer_list *t) - } - - struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev, -- int order) -+ struct mlx5r_cache_rb_key rb_key, -+ bool persistent_entry) - { - struct mlx5_cache_ent *ent; -+ int order; - int ret; - - ent = kzalloc(sizeof(*ent), GFP_KERNEL); -@@ -808,7 +880,7 @@ struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev, - return ERR_PTR(-ENOMEM); - - xa_init_flags(&ent->mkeys, XA_FLAGS_LOCK_IRQ); -- ent->order = order; -+ ent->rb_key = rb_key; - ent->dev = dev; - - INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); -@@ -818,13 +890,36 @@ struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev, - kfree(ent); - return ERR_PTR(ret); - } -+ -+ if (persistent_entry) { -+ if (rb_key.access_mode == MLX5_MKC_ACCESS_MODE_KSM) -+ order = MLX5_IMR_KSM_CACHE_ENTRY; -+ else -+ order = order_base_2(rb_key.ndescs) - 2; -+ -+ if ((dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) && -+ !dev->is_rep && mlx5_core_is_pf(dev->mdev) && -+ mlx5r_umr_can_load_pas(dev, 0)) -+ ent->limit = dev->mdev->profile.mr_cache[order].limit; -+ else -+ ent->limit = 0; -+ -+ mlx5_mkey_cache_debugfs_add_ent(dev, ent); -+ } -+ - return ent; - } - - int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev) - { - struct mlx5_mkey_cache *cache = &dev->cache; -+ struct rb_root *root = &dev->cache.rb_root; -+ struct mlx5r_cache_rb_key rb_key = { -+ .access_mode = MLX5_MKC_ACCESS_MODE_MTT, -+ }; - struct mlx5_cache_ent *ent; -+ struct rb_node *node; -+ int ret; - int i; - - mutex_init(&dev->slow_path_mutex); -@@ -838,33 +933,32 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev) - - mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx); - timer_setup(&dev->delay_timer, delay_time_func, 0); -- for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) { -- ent = mlx5r_cache_create_ent(dev, i); -- -- if (i > MKEY_CACHE_LAST_STD_ENTRY) { -- mlx5_odp_init_mkey_cache_entry(ent); -- continue; -+ mlx5_mkey_cache_debugfs_init(dev); -+ for (i = 0; i <= mkey_cache_max_order(dev); i++) { -+ rb_key.ndescs = 1 << (i + 2); -+ ent = mlx5r_cache_create_ent(dev, rb_key, true); -+ if (IS_ERR(ent)) { -+ ret = PTR_ERR(ent); -+ goto err; - } -+ } - -- if (ent->order > mkey_cache_max_order(dev)) -- continue; -+ ret = mlx5_odp_init_mkey_cache(dev); -+ if (ret) -+ goto err; - -- ent->ndescs = 1 << ent->order; -- ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT; -- if ((dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) && -- !dev->is_rep && mlx5_core_is_pf(dev->mdev) && -- mlx5r_umr_can_load_pas(dev, 0)) -- ent->limit = dev->mdev->profile.mr_cache[i].limit; -- else -- ent->limit = 0; -+ for (node = rb_first(root); node; node = rb_next(node)) { -+ ent = rb_entry(node, struct mlx5_cache_ent, node); - xa_lock_irq(&ent->mkeys); - queue_adjust_cache_locked(ent); - xa_unlock_irq(&ent->mkeys); - } - -- mlx5_mkey_cache_debugfs_init(dev); -- - return 0; -+ -+err: -+ mlx5_ib_warn(dev, "failed to create mkey cache entry\n"); -+ return ret; - } - - int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev) -@@ -965,7 +1059,7 @@ static int get_octo_len(u64 addr, u64 len, int page_shift) - static int mkey_cache_max_order(struct mlx5_ib_dev *dev) - { - if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) -- return MKEY_CACHE_LAST_STD_ENTRY + 2; -+ return MKEY_CACHE_LAST_STD_ENTRY; - return MLX5_MAX_UMR_SHIFT; - } - -@@ -995,6 +1089,9 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, - struct ib_umem *umem, u64 iova, - int access_flags) - { -+ struct mlx5r_cache_rb_key rb_key = { -+ .access_mode = MLX5_MKC_ACCESS_MODE_MTT, -+ }; - struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct mlx5_cache_ent *ent; - struct mlx5_ib_mr *mr; -@@ -1007,8 +1104,11 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, - 0, iova); - if (WARN_ON(!page_size)) - return ERR_PTR(-EINVAL); -- ent = mkey_cache_ent_from_order( -- dev, order_base_2(ib_umem_num_dma_blocks(umem, page_size))); -+ -+ rb_key.ndescs = ib_umem_num_dma_blocks(umem, page_size); -+ rb_key.ats = mlx5_umem_needs_ats(dev, umem, access_flags); -+ rb_key.access_flags = get_unchangeable_access_flags(dev, access_flags); -+ ent = mkey_cache_ent_from_rb_key(dev, rb_key); - /* - * Matches access in alloc_cache_mr(). If the MR can't come from the - * cache then synchronously create an uncached one. -@@ -1022,7 +1122,7 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, - return mr; - } - -- mr = mlx5_mr_cache_alloc(dev, ent, access_flags); -+ mr = _mlx5_mr_cache_alloc(dev, ent, access_flags); - if (IS_ERR(mr)) - return mr; - -@@ -1452,7 +1552,7 @@ static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr, - mlx5_umem_find_best_pgsz(new_umem, mkc, log_page_size, 0, iova); - if (WARN_ON(!*page_size)) - return false; -- return (1ULL << mr->mmkey.cache_ent->order) >= -+ return (mr->mmkey.cache_ent->rb_key.ndescs) >= - ib_umem_num_dma_blocks(new_umem, *page_size); - } - -diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c -index 7f68940ca0d1e..96d4faabbff8a 100644 ---- a/drivers/infiniband/hw/mlx5/odp.c -+++ b/drivers/infiniband/hw/mlx5/odp.c -@@ -406,7 +406,6 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev, - static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, - unsigned long idx) - { -- int order = order_base_2(MLX5_IMR_MTT_ENTRIES); - struct mlx5_ib_dev *dev = mr_to_mdev(imr); - struct ib_umem_odp *odp; - struct mlx5_ib_mr *mr; -@@ -419,8 +418,9 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, - if (IS_ERR(odp)) - return ERR_CAST(odp); - -- BUILD_BUG_ON(order > MKEY_CACHE_LAST_STD_ENTRY); -- mr = mlx5_mr_cache_alloc_order(dev, order, imr->access_flags); -+ mr = mlx5_mr_cache_alloc(dev, imr->access_flags, -+ MLX5_MKC_ACCESS_MODE_MTT, -+ MLX5_IMR_MTT_ENTRIES); - if (IS_ERR(mr)) { - ib_umem_odp_release(odp); - return mr; -@@ -494,8 +494,8 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, - if (IS_ERR(umem_odp)) - return ERR_CAST(umem_odp); - -- imr = mlx5_mr_cache_alloc_order(dev, MLX5_IMR_KSM_CACHE_ENTRY, -- access_flags); -+ imr = mlx5_mr_cache_alloc(dev, access_flags, MLX5_MKC_ACCESS_MODE_KSM, -+ mlx5_imr_ksm_entries); - if (IS_ERR(imr)) { - ib_umem_odp_release(umem_odp); - return imr; -@@ -1591,12 +1591,22 @@ mlx5_ib_odp_destroy_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq) - return err; - } - --void mlx5_odp_init_mkey_cache_entry(struct mlx5_cache_ent *ent) -+int mlx5_odp_init_mkey_cache(struct mlx5_ib_dev *dev) - { -- if (!(ent->dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) -- return; -- ent->ndescs = mlx5_imr_ksm_entries; -- ent->access_mode = MLX5_MKC_ACCESS_MODE_KSM; -+ struct mlx5r_cache_rb_key rb_key = { -+ .access_mode = MLX5_MKC_ACCESS_MODE_KSM, -+ .ndescs = mlx5_imr_ksm_entries, -+ }; -+ struct mlx5_cache_ent *ent; -+ -+ if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) -+ return 0; -+ -+ ent = mlx5r_cache_create_ent(dev, rb_key, true); -+ if (IS_ERR(ent)) -+ return PTR_ERR(ent); -+ -+ return 0; - } - - static const struct ib_device_ops mlx5_ib_dev_odp_ops = { --- -2.39.5 - diff --git a/queue-6.1/rdma-mlx5-limit-usage-of-over-sized-mkeys-from-the-mr-cache.patch b/queue-6.1/rdma-mlx5-limit-usage-of-over-sized-mkeys-from-the-mr-cache.patch deleted file mode 100644 index c68b5d0f1c..0000000000 --- a/queue-6.1/rdma-mlx5-limit-usage-of-over-sized-mkeys-from-the-mr-cache.patch +++ /dev/null @@ -1,92 +0,0 @@ -From ee6d57a2e13d11ce9050cfc3e3b69ef707a44a63 Mon Sep 17 00:00:00 2001 -From: Michael Guralnik -Date: Tue, 3 Sep 2024 14:24:49 +0300 -Subject: RDMA/mlx5: Limit usage of over-sized mkeys from the MR cache - -From: Michael Guralnik - -commit ee6d57a2e13d11ce9050cfc3e3b69ef707a44a63 upstream. - -When searching the MR cache for suitable cache entries, don't use mkeys -larger than twice the size required for the MR. -This should ensure the usage of mkeys closer to the minimal required size -and reduce memory waste. - -On driver init we create entries for mkeys with clear attributes and -powers of 2 sizes from 4 to the max supported size. -This solves the issue for anyone using mkeys that fit these -requirements. - -In the use case where an MR is registered with different attributes, -like an access flag we can't UMR, we'll create a new cache entry to store -it upon dereg. -Without this fix, any later registration with same attributes and smaller -size will use the newly created cache entry and it's mkeys, disregarding -the memory waste of using mkeys larger than required. - -For example, one worst-case scenario can be when registering and -deregistering a 1GB mkey with ATS enabled which will cause the creation of -a new cache entry to hold those type of mkeys. A user registering a 4k MR -with ATS will end up using the new cache entry and an mkey that can -support a 1GB MR, thus wasting x250k memory than actually needed in the HW. - -Additionally, allow all small registration to use the smallest size -cache entry that is initialized on driver load even if size is larger -than twice the required size. - -Fixes: 73d09b2fe833 ("RDMA/mlx5: Introduce mlx5r_cache_rb_key") -Signed-off-by: Michael Guralnik -Link: https://patch.msgid.link/8ba3a6e3748aace2026de8b83da03aba084f78f4.1725362530.git.leon@kernel.org -Signed-off-by: Leon Romanovsky -Signed-off-by: Greg Kroah-Hartman ---- - drivers/infiniband/hw/mlx5/mr.c | 14 ++++++++++++-- - 1 file changed, 12 insertions(+), 2 deletions(-) - ---- a/drivers/infiniband/hw/mlx5/mr.c -+++ b/drivers/infiniband/hw/mlx5/mr.c -@@ -48,6 +48,7 @@ enum { - MAX_PENDING_REG_MR = 8, - }; - -+#define MLX5_MR_CACHE_PERSISTENT_ENTRY_MIN_DESCS 4 - #define MLX5_UMR_ALIGN 2048 - - static void -@@ -656,6 +657,7 @@ mkey_cache_ent_from_rb_key(struct mlx5_i - { - struct rb_node *node = dev->cache.rb_root.rb_node; - struct mlx5_cache_ent *cur, *smallest = NULL; -+ u64 ndescs_limit; - int cmp; - - /* -@@ -674,10 +676,18 @@ mkey_cache_ent_from_rb_key(struct mlx5_i - return cur; - } - -+ /* -+ * Limit the usage of mkeys larger than twice the required size while -+ * also allowing the usage of smallest cache entry for small MRs. -+ */ -+ ndescs_limit = max_t(u64, rb_key.ndescs * 2, -+ MLX5_MR_CACHE_PERSISTENT_ENTRY_MIN_DESCS); -+ - return (smallest && - smallest->rb_key.access_mode == rb_key.access_mode && - smallest->rb_key.access_flags == rb_key.access_flags && -- smallest->rb_key.ats == rb_key.ats) ? -+ smallest->rb_key.ats == rb_key.ats && -+ smallest->rb_key.ndescs <= ndescs_limit) ? - smallest : - NULL; - } -@@ -958,7 +968,7 @@ int mlx5_mkey_cache_init(struct mlx5_ib_ - mlx5_mkey_cache_debugfs_init(dev); - mutex_lock(&cache->rb_lock); - for (i = 0; i <= mkey_cache_max_order(dev); i++) { -- rb_key.ndescs = 1 << (i + 2); -+ rb_key.ndescs = MLX5_MR_CACHE_PERSISTENT_ENTRY_MIN_DESCS << i; - ent = mlx5r_cache_create_ent_locked(dev, rb_key, true); - if (IS_ERR(ent)) { - ret = PTR_ERR(ent); diff --git a/queue-6.1/rdma-mlx5-reduce-qp-table-exposure.patch b/queue-6.1/rdma-mlx5-reduce-qp-table-exposure.patch deleted file mode 100644 index 8abe91b017..0000000000 --- a/queue-6.1/rdma-mlx5-reduce-qp-table-exposure.patch +++ /dev/null @@ -1,80 +0,0 @@ -From 31e1b4f44049773843852197aab66262fea5d3ca Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Mon, 5 Jun 2023 13:14:05 +0300 -Subject: RDMA/mlx5: Reduce QP table exposure - -From: Leon Romanovsky - -[ Upstream commit 2ecfd946169e7f56534db2a5f6935858be3005ba ] - -driver.h is common header to whole mlx5 code base, but struct -mlx5_qp_table is used in mlx5_ib driver only. So move that struct -to be under sole responsibility of mlx5_ib. - -Link: https://lore.kernel.org/r/bec0dc1158e795813b135d1143147977f26bf668.1685953497.git.leon@kernel.org -Signed-off-by: Leon Romanovsky -Stable-dep-of: c534ffda781f ("RDMA/mlx5: Fix AH static rate parsing") -Signed-off-by: Sasha Levin ---- - drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + - drivers/infiniband/hw/mlx5/qp.h | 11 ++++++++++- - include/linux/mlx5/driver.h | 9 --------- - 3 files changed, 11 insertions(+), 10 deletions(-) - -diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h -index 024d2071c6a5d..5c533023a51a4 100644 ---- a/drivers/infiniband/hw/mlx5/mlx5_ib.h -+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h -@@ -25,6 +25,7 @@ - #include - - #include "srq.h" -+#include "qp.h" - - #define mlx5_ib_dbg(_dev, format, arg...) \ - dev_dbg(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__, \ -diff --git a/drivers/infiniband/hw/mlx5/qp.h b/drivers/infiniband/hw/mlx5/qp.h -index fb2f4e030bb8f..e677fa0ca4226 100644 ---- a/drivers/infiniband/hw/mlx5/qp.h -+++ b/drivers/infiniband/hw/mlx5/qp.h -@@ -6,7 +6,16 @@ - #ifndef _MLX5_IB_QP_H - #define _MLX5_IB_QP_H - --#include "mlx5_ib.h" -+struct mlx5_ib_dev; -+ -+struct mlx5_qp_table { -+ struct notifier_block nb; -+ -+ /* protect radix tree -+ */ -+ spinlock_t lock; -+ struct radix_tree_root tree; -+}; - - int mlx5_init_qp_table(struct mlx5_ib_dev *dev); - void mlx5_cleanup_qp_table(struct mlx5_ib_dev *dev); -diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h -index 6cea62ca76d6b..060610183fdf9 100644 ---- a/include/linux/mlx5/driver.h -+++ b/include/linux/mlx5/driver.h -@@ -440,15 +440,6 @@ struct mlx5_core_health { - struct delayed_work update_fw_log_ts_work; - }; - --struct mlx5_qp_table { -- struct notifier_block nb; -- -- /* protect radix tree -- */ -- spinlock_t lock; -- struct radix_tree_root tree; --}; -- - enum { - MLX5_PF_NOTIFY_DISABLE_VF, - MLX5_PF_NOTIFY_ENABLE_VF, --- -2.39.5 - diff --git a/queue-6.1/rdma-mlx5-remove-extra-unlock-on-error-path.patch b/queue-6.1/rdma-mlx5-remove-extra-unlock-on-error-path.patch deleted file mode 100644 index 4961a41598..0000000000 --- a/queue-6.1/rdma-mlx5-remove-extra-unlock-on-error-path.patch +++ /dev/null @@ -1,38 +0,0 @@ -From c1eb2512596fb3542357bb6c34c286f5e0374538 Mon Sep 17 00:00:00 2001 -From: Jason Gunthorpe -Date: Tue, 28 May 2024 15:52:52 +0300 -Subject: RDMA/mlx5: Remove extra unlock on error path - -From: Jason Gunthorpe - -commit c1eb2512596fb3542357bb6c34c286f5e0374538 upstream. - -The below commit lifted the locking out of this function but left this -error path unlock behind resulting in unbalanced locking. Remove the -missed unlock too. - -Cc: stable@vger.kernel.org -Fixes: 627122280c87 ("RDMA/mlx5: Add work to remove temporary entries from the cache") -Signed-off-by: Jason Gunthorpe -Reviewed-by: Michael Guralnik -Link: https://lore.kernel.org/r/78090c210c750f47219b95248f9f782f34548bb1.1716900410.git.leon@kernel.org -Signed-off-by: Leon Romanovsky -Signed-off-by: Greg Kroah-Hartman ---- - drivers/infiniband/hw/mlx5/mr.c | 4 +--- - 1 file changed, 1 insertion(+), 3 deletions(-) - ---- a/drivers/infiniband/hw/mlx5/mr.c -+++ b/drivers/infiniband/hw/mlx5/mr.c -@@ -638,10 +638,8 @@ static int mlx5_cache_ent_insert(struct - new = &((*new)->rb_left); - if (cmp < 0) - new = &((*new)->rb_right); -- if (cmp == 0) { -- mutex_unlock(&cache->rb_lock); -+ if (cmp == 0) - return -EEXIST; -- } - } - - /* Add new node and rebalance tree. */ diff --git a/queue-6.1/rdma-mlx5-remove-implicit-odp-cache-entry.patch b/queue-6.1/rdma-mlx5-remove-implicit-odp-cache-entry.patch deleted file mode 100644 index 9ee1e9efa3..0000000000 --- a/queue-6.1/rdma-mlx5-remove-implicit-odp-cache-entry.patch +++ /dev/null @@ -1,83 +0,0 @@ -From f1cf3c129548533fa9dc9569a22ff1ed3e3c9e02 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Thu, 26 Jan 2023 00:28:03 +0200 -Subject: RDMA/mlx5: Remove implicit ODP cache entry - -From: Aharon Landau - -[ Upstream commit 18b1746bddf5e7f6b2618966596d9517172a5cd7 ] - -Implicit ODP mkey doesn't have unique properties. It shares the same -properties as the order 18 cache entry. There is no need to devote a -special entry for that. - -Link: https://lore.kernel.org/r/20230125222807.6921-3-michaelgur@nvidia.com -Signed-off-by: Aharon Landau -Signed-off-by: Jason Gunthorpe -Stable-dep-of: d97505baea64 ("RDMA/mlx5: Fix the recovery flow of the UMR QP") -Signed-off-by: Sasha Levin ---- - drivers/infiniband/hw/mlx5/odp.c | 20 +++++--------------- - include/linux/mlx5/driver.h | 1 - - 2 files changed, 5 insertions(+), 16 deletions(-) - -diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c -index a5c9baec8be85..5f0a17382de73 100644 ---- a/drivers/infiniband/hw/mlx5/odp.c -+++ b/drivers/infiniband/hw/mlx5/odp.c -@@ -406,6 +406,7 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev, - static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, - unsigned long idx) - { -+ int order = order_base_2(MLX5_IMR_MTT_ENTRIES); - struct mlx5_ib_dev *dev = mr_to_mdev(imr); - struct ib_umem_odp *odp; - struct mlx5_ib_mr *mr; -@@ -418,7 +419,8 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, - if (IS_ERR(odp)) - return ERR_CAST(odp); - -- mr = mlx5_mr_cache_alloc(dev, &dev->cache.ent[MLX5_IMR_MTT_CACHE_ENTRY], -+ BUILD_BUG_ON(order > MKEY_CACHE_LAST_STD_ENTRY); -+ mr = mlx5_mr_cache_alloc(dev, &dev->cache.ent[order], - imr->access_flags); - if (IS_ERR(mr)) { - ib_umem_odp_release(odp); -@@ -1595,20 +1597,8 @@ void mlx5_odp_init_mkey_cache_entry(struct mlx5_cache_ent *ent) - { - if (!(ent->dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) - return; -- -- switch (ent->order - 2) { -- case MLX5_IMR_MTT_CACHE_ENTRY: -- ent->ndescs = MLX5_IMR_MTT_ENTRIES; -- ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT; -- ent->limit = 0; -- break; -- -- case MLX5_IMR_KSM_CACHE_ENTRY: -- ent->ndescs = mlx5_imr_ksm_entries; -- ent->access_mode = MLX5_MKC_ACCESS_MODE_KSM; -- ent->limit = 0; -- break; -- } -+ ent->ndescs = mlx5_imr_ksm_entries; -+ ent->access_mode = MLX5_MKC_ACCESS_MODE_KSM; - } - - static const struct ib_device_ops mlx5_ib_dev_odp_ops = { -diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h -index 3c3e0f26c2446..6cea62ca76d6b 100644 ---- a/include/linux/mlx5/driver.h -+++ b/include/linux/mlx5/driver.h -@@ -744,7 +744,6 @@ enum { - - enum { - MKEY_CACHE_LAST_STD_ENTRY = 20, -- MLX5_IMR_MTT_CACHE_ENTRY, - MLX5_IMR_KSM_CACHE_ENTRY, - MAX_MKEY_CACHE_ENTRIES - }; --- -2.39.5 - diff --git a/queue-6.1/rdma-mlx5-remove-not-used-cache-disable-flag.patch b/queue-6.1/rdma-mlx5-remove-not-used-cache-disable-flag.patch deleted file mode 100644 index 0a384b91e9..0000000000 --- a/queue-6.1/rdma-mlx5-remove-not-used-cache-disable-flag.patch +++ /dev/null @@ -1,53 +0,0 @@ -From c99a7457e5bb873914a74307ba2df85f6799203b Mon Sep 17 00:00:00 2001 -From: Leon Romanovsky -Date: Thu, 28 Sep 2023 20:20:47 +0300 -Subject: RDMA/mlx5: Remove not-used cache disable flag - -From: Leon Romanovsky - -commit c99a7457e5bb873914a74307ba2df85f6799203b upstream. - -During execution of mlx5_mkey_cache_cleanup(), there is a guarantee -that MR are not registered and/or destroyed. It means that we don't -need newly introduced cache disable flag. - -Fixes: 374012b00457 ("RDMA/mlx5: Fix mkey cache possible deadlock on cleanup") -Link: https://lore.kernel.org/r/c7e9c9f98c8ae4a7413d97d9349b29f5b0a23dbe.1695921626.git.leon@kernel.org -Signed-off-by: Leon Romanovsky -Signed-off-by: Greg Kroah-Hartman ---- - drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 - - drivers/infiniband/hw/mlx5/mr.c | 5 ----- - 2 files changed, 6 deletions(-) - ---- a/drivers/infiniband/hw/mlx5/mlx5_ib.h -+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h -@@ -822,7 +822,6 @@ struct mlx5_mkey_cache { - struct dentry *fs_root; - unsigned long last_add; - struct delayed_work remove_ent_dwork; -- u8 disable: 1; - }; - - struct mlx5_ib_port_resources { ---- a/drivers/infiniband/hw/mlx5/mr.c -+++ b/drivers/infiniband/hw/mlx5/mr.c -@@ -1007,7 +1007,6 @@ int mlx5_mkey_cache_cleanup(struct mlx5_ - return 0; - - mutex_lock(&dev->cache.rb_lock); -- dev->cache.disable = true; - for (node = rb_first(root); node; node = rb_next(node)) { - ent = rb_entry(node, struct mlx5_cache_ent, node); - spin_lock_irq(&ent->mkeys_queue.lock); -@@ -1810,10 +1809,6 @@ static int cache_ent_find_and_store(stru - } - - mutex_lock(&cache->rb_lock); -- if (cache->disable) { -- mutex_unlock(&cache->rb_lock); -- return 0; -- } - ent = mkey_cache_ent_from_rb_key(dev, mr->mmkey.rb_key); - if (ent) { - if (ent->rb_key.ndescs == mr->mmkey.rb_key.ndescs) { diff --git a/queue-6.1/rdma-mlx5-uncacheable-mkey-has-neither-rb_key-or-cache_ent.patch b/queue-6.1/rdma-mlx5-uncacheable-mkey-has-neither-rb_key-or-cache_ent.patch deleted file mode 100644 index 4778718718..0000000000 --- a/queue-6.1/rdma-mlx5-uncacheable-mkey-has-neither-rb_key-or-cache_ent.patch +++ /dev/null @@ -1,33 +0,0 @@ -From 0611a8e8b475fc5230b9a24d29c8397aaab20b63 Mon Sep 17 00:00:00 2001 -From: Or Har-Toov -Date: Wed, 3 Apr 2024 13:35:59 +0300 -Subject: RDMA/mlx5: Uncacheable mkey has neither rb_key or cache_ent - -From: Or Har-Toov - -commit 0611a8e8b475fc5230b9a24d29c8397aaab20b63 upstream. - -As some mkeys can't be modified with UMR due to some UMR limitations, -like the size of translation that can be updated, not all user mkeys can -be cached. - -Fixes: dd1b913fb0d0 ("RDMA/mlx5: Cache all user cacheable mkeys on dereg MR flow") -Signed-off-by: Or Har-Toov -Link: https://lore.kernel.org/r/f2742dd934ed73b2d32c66afb8e91b823063880c.1712140377.git.leon@kernel.org -Signed-off-by: Leon Romanovsky -Signed-off-by: Greg Kroah-Hartman ---- - drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/drivers/infiniband/hw/mlx5/mlx5_ib.h -+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h -@@ -651,7 +651,7 @@ struct mlx5_ib_mkey { - unsigned int ndescs; - struct wait_queue_head wait; - refcount_t usecount; -- /* User Mkey must hold either a rb_key or a cache_ent. */ -+ /* Cacheable user Mkey must hold either a rb_key or a cache_ent. */ - struct mlx5r_cache_rb_key rb_key; - struct mlx5_cache_ent *cache_ent; - }; diff --git a/queue-6.1/series b/queue-6.1/series index 3dfeae4dff..f91c976f30 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -91,22 +91,10 @@ media-mediatek-vcodec-fix-h264-multi-stateless-decoder-smatch-warning.patch spi-atmel-quadspi-avoid-overwriting-delay-register-settings.patch spi-atmel-quadspi-fix-wrong-register-value-written-to-mr.patch netfilter-allow-exp-not-to-be-removed-in-nf_ct_find_expectation.patch -rdma-mlx5-don-t-keep-umrable-page_shift-in-cache-ent.patch -rdma-mlx5-remove-implicit-odp-cache-entry.patch -rdma-mlx5-change-the-cache-structure-to-an-rb-tree.patch -rdma-mlx5-introduce-mlx5r_cache_rb_key.patch -rdma-mlx5-cache-all-user-cacheable-mkeys-on-dereg-mr.patch -rdma-mlx5-add-work-to-remove-temporary-entries-from-.patch -rdma-mlx5-implement-mkeys-management-via-lifo-queue.patch -rdma-mlx5-fix-the-recovery-flow-of-the-umr-qp.patch ib-mlx5-set-and-get-correct-qp_num-for-a-dct-qp.patch ovl-fix-uaf-in-ovl_dentry_update_reval-by-moving-dpu.patch sunrpc-convert-rpc_task_-constants-to-enum.patch sunrpc-prevent-looping-due-to-rpc_signal_task-races.patch -rdma-mlx-calling-qp-event-handler-in-workqueue-conte.patch -rdma-mlx5-reduce-qp-table-exposure.patch -ib-core-add-support-for-xdr-link-speed.patch -rdma-mlx5-fix-ah-static-rate-parsing.patch scsi-core-clear-driver-private-data-when-retrying-re.patch rdma-mlx5-fix-bind-qp-error-cleanup-flow.patch sunrpc-suppress-warnings-for-unused-procfs-functions.patch @@ -171,17 +159,3 @@ mm-memory-use-exception-ip-to-search-exception-tables.patch squashfs-check-the-inode-number-is-not-the-invalid-value-of-zero.patch pfifo_tail_enqueue-drop-new-packet-when-sch-limit-0.patch media-mtk-vcodec-potential-null-pointer-deference-in-scp.patch -rdma-mlx5-fix-mr-cache-debugfs-error-in-ib-representors-mode.patch -rdma-mlx5-check-reg_create-create-for-errors.patch -rdma-mlx5-fix-mkey-cache-possible-deadlock-on-cleanup.patch -rdma-mlx5-fix-assigning-access-flags-to-cache-mkeys.patch -rdma-mlx5-uncacheable-mkey-has-neither-rb_key-or-cache_ent.patch -rdma-mlx5-change-check-for-cacheable-mkeys.patch -rdma-mlx5-remove-extra-unlock-on-error-path.patch -rdma-mlx5-follow-rb_key.ats-when-creating-new-mkeys.patch -rdma-mlx5-ensure-created-mkeys-always-have-a-populated-rb_key.patch -rdma-mlx5-fix-counter-update-on-mr-cache-mkey-creation.patch -rdma-mlx5-limit-usage-of-over-sized-mkeys-from-the-mr-cache.patch -rdma-mlx5-remove-not-used-cache-disable-flag.patch -rdma-mlx5-fix-mkey-cache-wq-flush.patch -rdma-mlx5-fix-mr-cache-temp-entries-cleanup.patch