+++ /dev/null
-From 15dee1c356175330d3f60d2cf3ab62b39be88ab7 Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Wed, 27 Sep 2023 14:12:40 +0800
-Subject: md: factor out a helper from mddev_put()
-
-From: Yu Kuai <yukuai3@huawei.com>
-
-[ Upstream commit 3d8d32873c7b6d9cec5b40c2ddb8c7c55961694f ]
-
-There are no functional changes, prepare to simplify md_seq_ops in next
-patch.
-
-Signed-off-by: Yu Kuai <yukuai3@huawei.com>
-Signed-off-by: Song Liu <song@kernel.org>
-Link: https://lore.kernel.org/r/20230927061241.1552837-2-yukuai1@huaweicloud.com
-Stable-dep-of: 8d28d0ddb986 ("md/md-bitmap: Synchronize bitmap_get_stats() with bitmap lifetime")
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- drivers/md/md.c | 29 +++++++++++++++++------------
- 1 file changed, 17 insertions(+), 12 deletions(-)
-
-diff --git a/drivers/md/md.c b/drivers/md/md.c
-index 4b629b7a540f7..44bac1e7d47e2 100644
---- a/drivers/md/md.c
-+++ b/drivers/md/md.c
-@@ -667,23 +667,28 @@ static inline struct mddev *mddev_get(struct mddev *mddev)
-
- static void mddev_delayed_delete(struct work_struct *ws);
-
-+static void __mddev_put(struct mddev *mddev)
-+{
-+ if (mddev->raid_disks || !list_empty(&mddev->disks) ||
-+ mddev->ctime || mddev->hold_active)
-+ return;
-+
-+ /* Array is not configured at all, and not held active, so destroy it */
-+ set_bit(MD_DELETED, &mddev->flags);
-+
-+ /*
-+ * Call queue_work inside the spinlock so that flush_workqueue() after
-+ * mddev_find will succeed in waiting for the work to be done.
-+ */
-+ queue_work(md_misc_wq, &mddev->del_work);
-+}
-+
- void mddev_put(struct mddev *mddev)
- {
- if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
- return;
-- if (!mddev->raid_disks && list_empty(&mddev->disks) &&
-- mddev->ctime == 0 && !mddev->hold_active) {
-- /* Array is not configured at all, and not held active,
-- * so destroy it */
-- set_bit(MD_DELETED, &mddev->flags);
-
-- /*
-- * Call queue_work inside the spinlock so that
-- * flush_workqueue() after mddev_find will succeed in waiting
-- * for the work to be done.
-- */
-- queue_work(md_misc_wq, &mddev->del_work);
-- }
-+ __mddev_put(mddev);
- spin_unlock(&all_mddevs_lock);
- }
-
---
-2.39.5
-
Stable-dep-of: 8d28d0ddb986 ("md/md-bitmap: Synchronize bitmap_get_stats() with bitmap lifetime")
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
- drivers/md/md-bitmap.c | 25 ++++++-------------------
- drivers/md/md-bitmap.h | 8 +++++++-
- drivers/md/md.c | 29 ++++++++++++++++++++++++++++-
+ drivers/md/md-bitmap.c | 25 ++++++-------------------
+ drivers/md/md-bitmap.h | 8 +++++++-
+ drivers/md/md.c | 29 ++++++++++++++++++++++++++++-
3 files changed, 41 insertions(+), 21 deletions(-)
-diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c
-index 9d8ac04c23462..736268447d3e1 100644
--- a/drivers/md/md-bitmap.c
+++ b/drivers/md/md-bitmap.c
-@@ -2022,32 +2022,19 @@ int md_bitmap_copy_from_slot(struct mddev *mddev, int slot,
+@@ -2022,32 +2022,19 @@ int md_bitmap_copy_from_slot(struct mdde
}
EXPORT_SYMBOL_GPL(md_bitmap_copy_from_slot);
}
int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks,
-diff --git a/drivers/md/md-bitmap.h b/drivers/md/md-bitmap.h
-index 3a4750952b3a7..00ac4c3ecf4d9 100644
--- a/drivers/md/md-bitmap.h
+++ b/drivers/md/md-bitmap.h
@@ -233,6 +233,12 @@ struct bitmap {
/* the bitmap API */
/* these are used only by md/bitmap */
-@@ -243,7 +249,7 @@ void md_bitmap_destroy(struct mddev *mddev);
+@@ -243,7 +249,7 @@ void md_bitmap_destroy(struct mddev *mdd
void md_bitmap_print_sb(struct bitmap *bitmap);
void md_bitmap_update_sb(struct bitmap *bitmap);
int md_bitmap_setallbits(struct bitmap *bitmap);
void md_bitmap_write_all(struct bitmap *bitmap);
-diff --git a/drivers/md/md.c b/drivers/md/md.c
-index 743244b06f679..887479e0d3afe 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
-@@ -8280,6 +8280,33 @@ static void md_seq_stop(struct seq_file *seq, void *v)
- spin_unlock(&all_mddevs_lock);
+@@ -8318,6 +8318,33 @@ static void md_seq_stop(struct seq_file
+ mddev_put(mddev);
}
+static void md_bitmap_status(struct seq_file *seq, struct mddev *mddev)
+
static int md_seq_show(struct seq_file *seq, void *v)
{
- struct mddev *mddev = list_entry(v, struct mddev, all_mddevs);
-@@ -8355,7 +8382,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
+ struct mddev *mddev = v;
+@@ -8406,7 +8433,7 @@ static int md_seq_show(struct seq_file *
} else
seq_printf(seq, "\n ");
seq_printf(seq, "\n");
}
---
-2.39.5
-
Signed-off-by: Song Liu <song@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
- drivers/md/md-bitmap.c | 5 ++++-
- drivers/md/md.c | 5 +++++
- 2 files changed, 9 insertions(+), 1 deletion(-)
+ drivers/md/md-bitmap.c | 5 ++++-
+ drivers/md/md.c | 4 ++++
+ 2 files changed, 8 insertions(+), 1 deletion(-)
-diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c
-index bddf4f3d27a77..e18e21b24210d 100644
--- a/drivers/md/md-bitmap.c
+++ b/drivers/md/md-bitmap.c
-@@ -2029,7 +2029,10 @@ int md_bitmap_get_stats(struct bitmap *bitmap, struct md_bitmap_stats *stats)
+@@ -2029,7 +2029,10 @@ int md_bitmap_get_stats(struct bitmap *b
if (!bitmap)
return -ENOENT;
sb = kmap_local_page(bitmap->storage.sb_page);
stats->sync_size = le64_to_cpu(sb->sync_size);
kunmap_local(sb);
-diff --git a/drivers/md/md.c b/drivers/md/md.c
-index 887479e0d3afe..e2a3a1e1afca0 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
-@@ -8317,6 +8317,10 @@ static int md_seq_show(struct seq_file *seq, void *v)
+@@ -8368,6 +8368,9 @@ static int md_seq_show(struct seq_file *
return 0;
+ }
- spin_unlock(&all_mddevs_lock);
-+
+ /* prevent bitmap to be freed after checking */
+ mutex_lock(&mddev->bitmap_info.mutex);
+
spin_lock(&mddev->lock);
if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
seq_printf(seq, "%s : %sactive", mdname(mddev),
-@@ -8387,6 +8391,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
+@@ -8438,6 +8441,7 @@ static int md_seq_show(struct seq_file *
seq_printf(seq, "\n");
}
spin_unlock(&mddev->lock);
+ mutex_unlock(&mddev->bitmap_info.mutex);
- spin_lock(&all_mddevs_lock);
- if (atomic_dec_and_test(&mddev->active))
- __mddev_put(mddev);
---
-2.39.5
-
+
+ return 0;
+ }
Stable-dep-of: 8d28d0ddb986 ("md/md-bitmap: Synchronize bitmap_get_stats() with bitmap lifetime")
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
- drivers/md/md-cluster.c | 6 +++---
+ drivers/md/md-cluster.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
-diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
-index 10e0c5381d01b..a0d3f6c397707 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
-@@ -1195,7 +1195,7 @@ static int cluster_check_sync_size(struct mddev *mddev)
+@@ -1195,7 +1195,7 @@ static int cluster_check_sync_size(struc
struct dlm_lock_resource *bm_lockres;
sb = kmap_atomic(bitmap->storage.sb_page);
kunmap_atomic(sb);
for (i = 0; i < node_num; i++) {
-@@ -1227,8 +1227,8 @@ static int cluster_check_sync_size(struct mddev *mddev)
+@@ -1227,8 +1227,8 @@ static int cluster_check_sync_size(struc
sb = kmap_atomic(bitmap->storage.sb_page);
if (sync_size == 0)
kunmap_atomic(sb);
md_bitmap_free(bitmap);
return -1;
---
-2.39.5
-
+++ /dev/null
-From 967a2f6f9119288165ecd4de0f01a2d0ed4a887c Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Wed, 27 Sep 2023 14:12:41 +0800
-Subject: md: simplify md_seq_ops
-
-From: Yu Kuai <yukuai3@huawei.com>
-
-[ Upstream commit cf1b6d4441fffd0ba8ae4ced6a12f578c95ca049 ]
-
-Before this patch, the implementation is hacky and hard to understand:
-
-1) md_seq_start set pos to 1;
-2) md_seq_show found pos is 1, then print Personalities;
-3) md_seq_next found pos is 1, then it update pos to the first mddev;
-4) md_seq_show found pos is not 1 or 2, show mddev;
-5) md_seq_next found pos is not 1 or 2, update pos to next mddev;
-6) loop 4-5 until the last mddev, then md_seq_next update pos to 2;
-7) md_seq_show found pos is 2, then print unused devices;
-8) md_seq_next found pos is 2, stop;
-
-This patch remove the magic value and use seq_list_start/next/stop()
-directly, and move printing "Personalities" to md_seq_start(),
-"unsed devices" to md_seq_stop():
-
-1) md_seq_start print Personalities, and then set pos to first mddev;
-2) md_seq_show show mddev;
-3) md_seq_next update pos to next mddev;
-4) loop 2-3 until the last mddev;
-5) md_seq_stop print unsed devices;
-
-Signed-off-by: Yu Kuai <yukuai3@huawei.com>
-Signed-off-by: Song Liu <song@kernel.org>
-Link: https://lore.kernel.org/r/20230927061241.1552837-3-yukuai1@huaweicloud.com
-Stable-dep-of: 8d28d0ddb986 ("md/md-bitmap: Synchronize bitmap_get_stats() with bitmap lifetime")
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- drivers/md/md.c | 100 +++++++++++-------------------------------------
- 1 file changed, 22 insertions(+), 78 deletions(-)
-
-diff --git a/drivers/md/md.c b/drivers/md/md.c
-index 44bac1e7d47e2..743244b06f679 100644
---- a/drivers/md/md.c
-+++ b/drivers/md/md.c
-@@ -8250,105 +8250,46 @@ static int status_resync(struct seq_file *seq, struct mddev *mddev)
- }
-
- static void *md_seq_start(struct seq_file *seq, loff_t *pos)
-+ __acquires(&all_mddevs_lock)
- {
-- struct list_head *tmp;
-- loff_t l = *pos;
-- struct mddev *mddev;
-+ struct md_personality *pers;
-
-- if (l == 0x10000) {
-- ++*pos;
-- return (void *)2;
-- }
-- if (l > 0x10000)
-- return NULL;
-- if (!l--)
-- /* header */
-- return (void*)1;
-+ seq_puts(seq, "Personalities : ");
-+ spin_lock(&pers_lock);
-+ list_for_each_entry(pers, &pers_list, list)
-+ seq_printf(seq, "[%s] ", pers->name);
-+
-+ spin_unlock(&pers_lock);
-+ seq_puts(seq, "\n");
-+ seq->poll_event = atomic_read(&md_event_count);
-
- spin_lock(&all_mddevs_lock);
-- list_for_each(tmp,&all_mddevs)
-- if (!l--) {
-- mddev = list_entry(tmp, struct mddev, all_mddevs);
-- if (!mddev_get(mddev))
-- continue;
-- spin_unlock(&all_mddevs_lock);
-- return mddev;
-- }
-- spin_unlock(&all_mddevs_lock);
-- if (!l--)
-- return (void*)2;/* tail */
-- return NULL;
-+
-+ return seq_list_start(&all_mddevs, *pos);
- }
-
- static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
- {
-- struct list_head *tmp;
-- struct mddev *next_mddev, *mddev = v;
-- struct mddev *to_put = NULL;
--
-- ++*pos;
-- if (v == (void*)2)
-- return NULL;
--
-- spin_lock(&all_mddevs_lock);
-- if (v == (void*)1) {
-- tmp = all_mddevs.next;
-- } else {
-- to_put = mddev;
-- tmp = mddev->all_mddevs.next;
-- }
--
-- for (;;) {
-- if (tmp == &all_mddevs) {
-- next_mddev = (void*)2;
-- *pos = 0x10000;
-- break;
-- }
-- next_mddev = list_entry(tmp, struct mddev, all_mddevs);
-- if (mddev_get(next_mddev))
-- break;
-- mddev = next_mddev;
-- tmp = mddev->all_mddevs.next;
-- }
-- spin_unlock(&all_mddevs_lock);
--
-- if (to_put)
-- mddev_put(to_put);
-- return next_mddev;
--
-+ return seq_list_next(v, &all_mddevs, pos);
- }
-
- static void md_seq_stop(struct seq_file *seq, void *v)
-+ __releases(&all_mddevs_lock)
- {
-- struct mddev *mddev = v;
--
-- if (mddev && v != (void*)1 && v != (void*)2)
-- mddev_put(mddev);
-+ status_unused(seq);
-+ spin_unlock(&all_mddevs_lock);
- }
-
- static int md_seq_show(struct seq_file *seq, void *v)
- {
-- struct mddev *mddev = v;
-+ struct mddev *mddev = list_entry(v, struct mddev, all_mddevs);
- sector_t sectors;
- struct md_rdev *rdev;
-
-- if (v == (void*)1) {
-- struct md_personality *pers;
-- seq_printf(seq, "Personalities : ");
-- spin_lock(&pers_lock);
-- list_for_each_entry(pers, &pers_list, list)
-- seq_printf(seq, "[%s] ", pers->name);
--
-- spin_unlock(&pers_lock);
-- seq_printf(seq, "\n");
-- seq->poll_event = atomic_read(&md_event_count);
-+ if (!mddev_get(mddev))
- return 0;
-- }
-- if (v == (void*)2) {
-- status_unused(seq);
-- return 0;
-- }
-
-+ spin_unlock(&all_mddevs_lock);
- spin_lock(&mddev->lock);
- if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
- seq_printf(seq, "%s : %sactive", mdname(mddev),
-@@ -8419,6 +8360,9 @@ static int md_seq_show(struct seq_file *seq, void *v)
- seq_printf(seq, "\n");
- }
- spin_unlock(&mddev->lock);
-+ spin_lock(&all_mddevs_lock);
-+ if (atomic_dec_and_test(&mddev->active))
-+ __mddev_put(mddev);
-
- return 0;
- }
---
-2.39.5
-
+++ /dev/null
-From 0787fc2c3fe6a4fcf1d43a6a48718f054fc56f3f Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Fri, 25 Aug 2023 11:16:16 +0800
-Subject: md: use separate work_struct for md_start_sync()
-
-From: Yu Kuai <yukuai3@huawei.com>
-
-[ Upstream commit ac619781967bd5663c29606246b50dbebd8b3473 ]
-
-It's a little weird to borrow 'del_work' for md_start_sync(), declare
-a new work_struct 'sync_work' for md_start_sync().
-
-Signed-off-by: Yu Kuai <yukuai3@huawei.com>
-Reviewed-by: Xiao Ni <xni@redhat.com>
-Signed-off-by: Song Liu <song@kernel.org>
-Link: https://lore.kernel.org/r/20230825031622.1530464-2-yukuai1@huaweicloud.com
-Stable-dep-of: 8d28d0ddb986 ("md/md-bitmap: Synchronize bitmap_get_stats() with bitmap lifetime")
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- drivers/md/md.c | 10 ++++++----
- drivers/md/md.h | 5 ++++-
- 2 files changed, 10 insertions(+), 5 deletions(-)
-
-diff --git a/drivers/md/md.c b/drivers/md/md.c
-index 297c86f5c70b5..4b629b7a540f7 100644
---- a/drivers/md/md.c
-+++ b/drivers/md/md.c
-@@ -682,13 +682,13 @@ void mddev_put(struct mddev *mddev)
- * flush_workqueue() after mddev_find will succeed in waiting
- * for the work to be done.
- */
-- INIT_WORK(&mddev->del_work, mddev_delayed_delete);
- queue_work(md_misc_wq, &mddev->del_work);
- }
- spin_unlock(&all_mddevs_lock);
- }
-
- static void md_safemode_timeout(struct timer_list *t);
-+static void md_start_sync(struct work_struct *ws);
-
- void mddev_init(struct mddev *mddev)
- {
-@@ -710,6 +710,9 @@ void mddev_init(struct mddev *mddev)
- mddev->resync_min = 0;
- mddev->resync_max = MaxSector;
- mddev->level = LEVEL_NONE;
-+
-+ INIT_WORK(&mddev->sync_work, md_start_sync);
-+ INIT_WORK(&mddev->del_work, mddev_delayed_delete);
- }
- EXPORT_SYMBOL_GPL(mddev_init);
-
-@@ -9308,7 +9311,7 @@ static int remove_and_add_spares(struct mddev *mddev,
-
- static void md_start_sync(struct work_struct *ws)
- {
-- struct mddev *mddev = container_of(ws, struct mddev, del_work);
-+ struct mddev *mddev = container_of(ws, struct mddev, sync_work);
-
- mddev->sync_thread = md_register_thread(md_do_sync,
- mddev,
-@@ -9516,8 +9519,7 @@ void md_check_recovery(struct mddev *mddev)
- */
- md_bitmap_write_all(mddev->bitmap);
- }
-- INIT_WORK(&mddev->del_work, md_start_sync);
-- queue_work(md_misc_wq, &mddev->del_work);
-+ queue_work(md_misc_wq, &mddev->sync_work);
- goto unlock;
- }
- not_running:
-diff --git a/drivers/md/md.h b/drivers/md/md.h
-index 4f0b480974552..c1258c94216ac 100644
---- a/drivers/md/md.h
-+++ b/drivers/md/md.h
-@@ -452,7 +452,10 @@ struct mddev {
- struct kernfs_node *sysfs_degraded; /*handle for 'degraded' */
- struct kernfs_node *sysfs_level; /*handle for 'level' */
-
-- struct work_struct del_work; /* used for delayed sysfs removal */
-+ /* used for delayed sysfs removal */
-+ struct work_struct del_work;
-+ /* used for register new sync thread */
-+ struct work_struct sync_work;
-
- /* "lock" protects:
- * flush_bio transition from NULL to !NULL
---
-2.39.5
-
--- /dev/null
+From a53e215f90079f617360439b1b6284820731e34c Mon Sep 17 00:00:00 2001
+From: Moshe Shemesh <moshe@nvidia.com>
+Date: Wed, 25 Oct 2023 20:49:59 +0300
+Subject: RDMA/mlx5: Fix mkey cache WQ flush
+
+From: Moshe Shemesh <moshe@nvidia.com>
+
+commit a53e215f90079f617360439b1b6284820731e34c upstream.
+
+The cited patch tries to ensure no pending works on the mkey cache
+workqueue by disabling adding new works and call flush_workqueue().
+But this workqueue also has delayed works which might still be pending
+the delay time to be queued.
+
+Add cancel_delayed_work() for the delayed works which waits to be queued
+and then the flush_workqueue() will flush all works which are already
+queued and running.
+
+Fixes: 374012b00457 ("RDMA/mlx5: Fix mkey cache possible deadlock on cleanup")
+Link: https://lore.kernel.org/r/b8722f14e7ed81452f791764a26d2ed4cfa11478.1698256179.git.leon@kernel.org
+Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
+Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
+Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/hw/mlx5/mr.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/infiniband/hw/mlx5/mr.c
++++ b/drivers/infiniband/hw/mlx5/mr.c
+@@ -1007,11 +1007,13 @@ int mlx5_mkey_cache_cleanup(struct mlx5_
+ return 0;
+
+ mutex_lock(&dev->cache.rb_lock);
++ cancel_delayed_work(&dev->cache.remove_ent_dwork);
+ for (node = rb_first(root); node; node = rb_next(node)) {
+ ent = rb_entry(node, struct mlx5_cache_ent, node);
+ spin_lock_irq(&ent->mkeys_queue.lock);
+ ent->disabled = true;
+ spin_unlock_irq(&ent->mkeys_queue.lock);
++ cancel_delayed_work(&ent->dwork);
+ }
+ mutex_unlock(&dev->cache.rb_lock);
+
--- /dev/null
+From 7ebb00cea49db641b458edef0ede389f7004821d Mon Sep 17 00:00:00 2001
+From: Michael Guralnik <michaelgur@nvidia.com>
+Date: Tue, 3 Sep 2024 14:24:50 +0300
+Subject: RDMA/mlx5: Fix MR cache temp entries cleanup
+
+From: Michael Guralnik <michaelgur@nvidia.com>
+
+commit 7ebb00cea49db641b458edef0ede389f7004821d upstream.
+
+Fix the cleanup of the temp cache entries that are dynamically created
+in the MR cache.
+
+The cleanup of the temp cache entries is currently scheduled only when a
+new entry is created. Since in the cleanup of the entries only the mkeys
+are destroyed and the cache entry stays in the cache, subsequent
+registrations might reuse the entry and it will eventually be filled with
+new mkeys without cleanup ever getting scheduled again.
+
+On workloads that register and deregister MRs with a wide range of
+properties we see the cache ends up holding many cache entries, each
+holding the max number of mkeys that were ever used through it.
+
+Additionally, as the cleanup work is scheduled to run over the whole
+cache, any mkey that is returned to the cache after the cleanup was
+scheduled will be held for less than the intended 30 seconds timeout.
+
+Solve both issues by dropping the existing remove_ent_work and reusing
+the existing per-entry work to also handle the temp entries cleanup.
+
+Schedule the work to run with a 30 seconds delay every time we push an
+mkey to a clean temp entry.
+This ensures the cleanup runs on each entry only 30 seconds after the
+first mkey was pushed to an empty entry.
+
+As we have already been distinguishing between persistent and temp entries
+when scheduling the cache_work_func, it is not being scheduled in any
+other flows for the temp entries.
+
+Another benefit from moving to a per-entry cleanup is we now not
+required to hold the rb_tree mutex, thus enabling other flow to run
+concurrently.
+
+Fixes: dd1b913fb0d0 ("RDMA/mlx5: Cache all user cacheable mkeys on dereg MR flow")
+Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
+Link: https://patch.msgid.link/e4fa4bb03bebf20dceae320f26816cd2dde23a26.1725362530.git.leon@kernel.org
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/hw/mlx5/mlx5_ib.h | 2
+ drivers/infiniband/hw/mlx5/mr.c | 85 +++++++++++++----------------------
+ 2 files changed, 34 insertions(+), 53 deletions(-)
+
+--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
++++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
+@@ -790,6 +790,7 @@ struct mlx5_cache_ent {
+ u8 is_tmp:1;
+ u8 disabled:1;
+ u8 fill_to_high_water:1;
++ u8 tmp_cleanup_scheduled:1;
+
+ /*
+ * - limit is the low water mark for stored mkeys, 2* limit is the
+@@ -821,7 +822,6 @@ struct mlx5_mkey_cache {
+ struct mutex rb_lock;
+ struct dentry *fs_root;
+ unsigned long last_add;
+- struct delayed_work remove_ent_dwork;
+ };
+
+ struct mlx5_ib_port_resources {
+--- a/drivers/infiniband/hw/mlx5/mr.c
++++ b/drivers/infiniband/hw/mlx5/mr.c
+@@ -525,6 +525,23 @@ static void queue_adjust_cache_locked(st
+ }
+ }
+
++static void clean_keys(struct mlx5_ib_dev *dev, struct mlx5_cache_ent *ent)
++{
++ u32 mkey;
++
++ cancel_delayed_work(&ent->dwork);
++ spin_lock_irq(&ent->mkeys_queue.lock);
++ while (ent->mkeys_queue.ci) {
++ mkey = pop_mkey_locked(ent);
++ spin_unlock_irq(&ent->mkeys_queue.lock);
++ mlx5_core_destroy_mkey(dev->mdev, mkey);
++ spin_lock_irq(&ent->mkeys_queue.lock);
++ }
++ ent->tmp_cleanup_scheduled = false;
++ spin_unlock_irq(&ent->mkeys_queue.lock);
++}
++
++
+ static void __cache_work_func(struct mlx5_cache_ent *ent)
+ {
+ struct mlx5_ib_dev *dev = ent->dev;
+@@ -596,7 +613,11 @@ static void delayed_cache_work_func(stru
+ struct mlx5_cache_ent *ent;
+
+ ent = container_of(work, struct mlx5_cache_ent, dwork.work);
+- __cache_work_func(ent);
++ /* temp entries are never filled, only cleaned */
++ if (ent->is_tmp)
++ clean_keys(ent->dev, ent);
++ else
++ __cache_work_func(ent);
+ }
+
+ static int cache_ent_key_cmp(struct mlx5r_cache_rb_key key1,
+@@ -771,21 +792,6 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(s
+ return _mlx5_mr_cache_alloc(dev, ent, access_flags);
+ }
+
+-static void clean_keys(struct mlx5_ib_dev *dev, struct mlx5_cache_ent *ent)
+-{
+- u32 mkey;
+-
+- cancel_delayed_work(&ent->dwork);
+- spin_lock_irq(&ent->mkeys_queue.lock);
+- while (ent->mkeys_queue.ci) {
+- mkey = pop_mkey_locked(ent);
+- spin_unlock_irq(&ent->mkeys_queue.lock);
+- mlx5_core_destroy_mkey(dev->mdev, mkey);
+- spin_lock_irq(&ent->mkeys_queue.lock);
+- }
+- spin_unlock_irq(&ent->mkeys_queue.lock);
+-}
+-
+ static void mlx5_mkey_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
+ {
+ if (!mlx5_debugfs_root || dev->is_rep)
+@@ -898,10 +904,6 @@ mlx5r_cache_create_ent_locked(struct mlx
+ ent->limit = 0;
+
+ mlx5_mkey_cache_debugfs_add_ent(dev, ent);
+- } else {
+- mod_delayed_work(ent->dev->cache.wq,
+- &ent->dev->cache.remove_ent_dwork,
+- msecs_to_jiffies(30 * 1000));
+ }
+
+ return ent;
+@@ -912,35 +914,6 @@ mkeys_err:
+ return ERR_PTR(ret);
+ }
+
+-static void remove_ent_work_func(struct work_struct *work)
+-{
+- struct mlx5_mkey_cache *cache;
+- struct mlx5_cache_ent *ent;
+- struct rb_node *cur;
+-
+- cache = container_of(work, struct mlx5_mkey_cache,
+- remove_ent_dwork.work);
+- mutex_lock(&cache->rb_lock);
+- cur = rb_last(&cache->rb_root);
+- while (cur) {
+- ent = rb_entry(cur, struct mlx5_cache_ent, node);
+- cur = rb_prev(cur);
+- mutex_unlock(&cache->rb_lock);
+-
+- spin_lock_irq(&ent->mkeys_queue.lock);
+- if (!ent->is_tmp) {
+- spin_unlock_irq(&ent->mkeys_queue.lock);
+- mutex_lock(&cache->rb_lock);
+- continue;
+- }
+- spin_unlock_irq(&ent->mkeys_queue.lock);
+-
+- clean_keys(ent->dev, ent);
+- mutex_lock(&cache->rb_lock);
+- }
+- mutex_unlock(&cache->rb_lock);
+-}
+-
+ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
+ {
+ struct mlx5_mkey_cache *cache = &dev->cache;
+@@ -956,7 +929,6 @@ int mlx5_mkey_cache_init(struct mlx5_ib_
+ mutex_init(&dev->slow_path_mutex);
+ mutex_init(&dev->cache.rb_lock);
+ dev->cache.rb_root = RB_ROOT;
+- INIT_DELAYED_WORK(&dev->cache.remove_ent_dwork, remove_ent_work_func);
+ cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM);
+ if (!cache->wq) {
+ mlx5_ib_warn(dev, "failed to create work queue\n");
+@@ -1007,7 +979,6 @@ int mlx5_mkey_cache_cleanup(struct mlx5_
+ return 0;
+
+ mutex_lock(&dev->cache.rb_lock);
+- cancel_delayed_work(&dev->cache.remove_ent_dwork);
+ for (node = rb_first(root); node; node = rb_next(node)) {
+ ent = rb_entry(node, struct mlx5_cache_ent, node);
+ spin_lock_irq(&ent->mkeys_queue.lock);
+@@ -1844,8 +1815,18 @@ static int mlx5_revoke_mr(struct mlx5_ib
+ struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
+ struct mlx5_cache_ent *ent = mr->mmkey.cache_ent;
+
+- if (mr->mmkey.cacheable && !mlx5r_umr_revoke_mr(mr) && !cache_ent_find_and_store(dev, mr))
++ if (mr->mmkey.cacheable && !mlx5r_umr_revoke_mr(mr) && !cache_ent_find_and_store(dev, mr)) {
++ ent = mr->mmkey.cache_ent;
++ /* upon storing to a clean temp entry - schedule its cleanup */
++ spin_lock_irq(&ent->mkeys_queue.lock);
++ if (ent->is_tmp && !ent->tmp_cleanup_scheduled) {
++ mod_delayed_work(ent->dev->cache.wq, &ent->dwork,
++ msecs_to_jiffies(30 * 1000));
++ ent->tmp_cleanup_scheduled = true;
++ }
++ spin_unlock_irq(&ent->mkeys_queue.lock);
+ return 0;
++ }
+
+ if (ent) {
+ spin_lock_irq(&ent->mkeys_queue.lock);
--- /dev/null
+From c99a7457e5bb873914a74307ba2df85f6799203b Mon Sep 17 00:00:00 2001
+From: Leon Romanovsky <leonro@nvidia.com>
+Date: Thu, 28 Sep 2023 20:20:47 +0300
+Subject: RDMA/mlx5: Remove not-used cache disable flag
+
+From: Leon Romanovsky <leonro@nvidia.com>
+
+commit c99a7457e5bb873914a74307ba2df85f6799203b upstream.
+
+During execution of mlx5_mkey_cache_cleanup(), there is a guarantee
+that MR are not registered and/or destroyed. It means that we don't
+need newly introduced cache disable flag.
+
+Fixes: 374012b00457 ("RDMA/mlx5: Fix mkey cache possible deadlock on cleanup")
+Link: https://lore.kernel.org/r/c7e9c9f98c8ae4a7413d97d9349b29f5b0a23dbe.1695921626.git.leon@kernel.org
+Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 -
+ drivers/infiniband/hw/mlx5/mr.c | 5 -----
+ 2 files changed, 6 deletions(-)
+
+--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
++++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
+@@ -822,7 +822,6 @@ struct mlx5_mkey_cache {
+ struct dentry *fs_root;
+ unsigned long last_add;
+ struct delayed_work remove_ent_dwork;
+- u8 disable: 1;
+ };
+
+ struct mlx5_ib_port_resources {
+--- a/drivers/infiniband/hw/mlx5/mr.c
++++ b/drivers/infiniband/hw/mlx5/mr.c
+@@ -1007,7 +1007,6 @@ int mlx5_mkey_cache_cleanup(struct mlx5_
+ return 0;
+
+ mutex_lock(&dev->cache.rb_lock);
+- dev->cache.disable = true;
+ for (node = rb_first(root); node; node = rb_next(node)) {
+ ent = rb_entry(node, struct mlx5_cache_ent, node);
+ spin_lock_irq(&ent->mkeys_queue.lock);
+@@ -1810,10 +1809,6 @@ static int cache_ent_find_and_store(stru
+ }
+
+ mutex_lock(&cache->rb_lock);
+- if (cache->disable) {
+- mutex_unlock(&cache->rb_lock);
+- return 0;
+- }
+ ent = mkey_cache_ent_from_rb_key(dev, mr->mmkey.rb_key);
+ if (ent) {
+ if (ent->rb_key.ndescs == mr->mmkey.rb_key.ndescs) {
arm64-mte-do-not-allow-prot_mte-on-map_hugetlb-user-mappings.patch
-md-use-separate-work_struct-for-md_start_sync.patch
-md-factor-out-a-helper-from-mddev_put.patch
-md-simplify-md_seq_ops.patch
md-md-bitmap-replace-md_bitmap_status-with-a-new-hel.patch
md-md-cluster-fix-spares-warnings-for-__le64.patch
md-md-bitmap-add-sync_size-into-struct-md_bitmap_sta.patch
rdma-mlx5-ensure-created-mkeys-always-have-a-populated-rb_key.patch
rdma-mlx5-fix-counter-update-on-mr-cache-mkey-creation.patch
rdma-mlx5-limit-usage-of-over-sized-mkeys-from-the-mr-cache.patch
+rdma-mlx5-remove-not-used-cache-disable-flag.patch
+rdma-mlx5-fix-mkey-cache-wq-flush.patch
+rdma-mlx5-fix-mr-cache-temp-entries-cleanup.patch