From: Greg Kroah-Hartman Date: Thu, 6 Mar 2025 14:20:43 +0000 (+0100) Subject: 6.1-stable patches X-Git-Tag: v6.6.81~2 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=5bd9a4fedc2579df2a91e19fff97a986cbed9339;p=thirdparty%2Fkernel%2Fstable-queue.git 6.1-stable patches added patches: rdma-mlx5-fix-mkey-cache-wq-flush.patch rdma-mlx5-fix-mr-cache-temp-entries-cleanup.patch rdma-mlx5-remove-not-used-cache-disable-flag.patch --- diff --git a/queue-6.1/md-factor-out-a-helper-from-mddev_put.patch b/queue-6.1/md-factor-out-a-helper-from-mddev_put.patch deleted file mode 100644 index b0a2a9c7ad..0000000000 --- a/queue-6.1/md-factor-out-a-helper-from-mddev_put.patch +++ /dev/null @@ -1,69 +0,0 @@ -From 15dee1c356175330d3f60d2cf3ab62b39be88ab7 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Wed, 27 Sep 2023 14:12:40 +0800 -Subject: md: factor out a helper from mddev_put() - -From: Yu Kuai - -[ Upstream commit 3d8d32873c7b6d9cec5b40c2ddb8c7c55961694f ] - -There are no functional changes, prepare to simplify md_seq_ops in next -patch. - -Signed-off-by: Yu Kuai -Signed-off-by: Song Liu -Link: https://lore.kernel.org/r/20230927061241.1552837-2-yukuai1@huaweicloud.com -Stable-dep-of: 8d28d0ddb986 ("md/md-bitmap: Synchronize bitmap_get_stats() with bitmap lifetime") -Signed-off-by: Sasha Levin ---- - drivers/md/md.c | 29 +++++++++++++++++------------ - 1 file changed, 17 insertions(+), 12 deletions(-) - -diff --git a/drivers/md/md.c b/drivers/md/md.c -index 4b629b7a540f7..44bac1e7d47e2 100644 ---- a/drivers/md/md.c -+++ b/drivers/md/md.c -@@ -667,23 +667,28 @@ static inline struct mddev *mddev_get(struct mddev *mddev) - - static void mddev_delayed_delete(struct work_struct *ws); - -+static void __mddev_put(struct mddev *mddev) -+{ -+ if (mddev->raid_disks || !list_empty(&mddev->disks) || -+ mddev->ctime || mddev->hold_active) -+ return; -+ -+ /* Array is not configured at all, and not held active, so destroy it */ -+ set_bit(MD_DELETED, &mddev->flags); -+ -+ /* -+ * Call queue_work inside the spinlock so that flush_workqueue() after -+ * mddev_find will succeed in waiting for the work to be done. -+ */ -+ queue_work(md_misc_wq, &mddev->del_work); -+} -+ - void mddev_put(struct mddev *mddev) - { - if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock)) - return; -- if (!mddev->raid_disks && list_empty(&mddev->disks) && -- mddev->ctime == 0 && !mddev->hold_active) { -- /* Array is not configured at all, and not held active, -- * so destroy it */ -- set_bit(MD_DELETED, &mddev->flags); - -- /* -- * Call queue_work inside the spinlock so that -- * flush_workqueue() after mddev_find will succeed in waiting -- * for the work to be done. -- */ -- queue_work(md_misc_wq, &mddev->del_work); -- } -+ __mddev_put(mddev); - spin_unlock(&all_mddevs_lock); - } - --- -2.39.5 - diff --git a/queue-6.1/md-md-bitmap-replace-md_bitmap_status-with-a-new-hel.patch b/queue-6.1/md-md-bitmap-replace-md_bitmap_status-with-a-new-hel.patch index f22c218d48..a73a659f00 100644 --- a/queue-6.1/md-md-bitmap-replace-md_bitmap_status-with-a-new-hel.patch +++ b/queue-6.1/md-md-bitmap-replace-md_bitmap_status-with-a-new-hel.patch @@ -18,16 +18,14 @@ Signed-off-by: Song Liu Stable-dep-of: 8d28d0ddb986 ("md/md-bitmap: Synchronize bitmap_get_stats() with bitmap lifetime") Signed-off-by: Sasha Levin --- - drivers/md/md-bitmap.c | 25 ++++++------------------- - drivers/md/md-bitmap.h | 8 +++++++- - drivers/md/md.c | 29 ++++++++++++++++++++++++++++- + drivers/md/md-bitmap.c | 25 ++++++------------------- + drivers/md/md-bitmap.h | 8 +++++++- + drivers/md/md.c | 29 ++++++++++++++++++++++++++++- 3 files changed, 41 insertions(+), 21 deletions(-) -diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c -index 9d8ac04c23462..736268447d3e1 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c -@@ -2022,32 +2022,19 @@ int md_bitmap_copy_from_slot(struct mddev *mddev, int slot, +@@ -2022,32 +2022,19 @@ int md_bitmap_copy_from_slot(struct mdde } EXPORT_SYMBOL_GPL(md_bitmap_copy_from_slot); @@ -66,8 +64,6 @@ index 9d8ac04c23462..736268447d3e1 100644 } int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks, -diff --git a/drivers/md/md-bitmap.h b/drivers/md/md-bitmap.h -index 3a4750952b3a7..00ac4c3ecf4d9 100644 --- a/drivers/md/md-bitmap.h +++ b/drivers/md/md-bitmap.h @@ -233,6 +233,12 @@ struct bitmap { @@ -83,7 +79,7 @@ index 3a4750952b3a7..00ac4c3ecf4d9 100644 /* the bitmap API */ /* these are used only by md/bitmap */ -@@ -243,7 +249,7 @@ void md_bitmap_destroy(struct mddev *mddev); +@@ -243,7 +249,7 @@ void md_bitmap_destroy(struct mddev *mdd void md_bitmap_print_sb(struct bitmap *bitmap); void md_bitmap_update_sb(struct bitmap *bitmap); @@ -92,12 +88,10 @@ index 3a4750952b3a7..00ac4c3ecf4d9 100644 int md_bitmap_setallbits(struct bitmap *bitmap); void md_bitmap_write_all(struct bitmap *bitmap); -diff --git a/drivers/md/md.c b/drivers/md/md.c -index 743244b06f679..887479e0d3afe 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c -@@ -8280,6 +8280,33 @@ static void md_seq_stop(struct seq_file *seq, void *v) - spin_unlock(&all_mddevs_lock); +@@ -8318,6 +8318,33 @@ static void md_seq_stop(struct seq_file + mddev_put(mddev); } +static void md_bitmap_status(struct seq_file *seq, struct mddev *mddev) @@ -129,8 +123,8 @@ index 743244b06f679..887479e0d3afe 100644 + static int md_seq_show(struct seq_file *seq, void *v) { - struct mddev *mddev = list_entry(v, struct mddev, all_mddevs); -@@ -8355,7 +8382,7 @@ static int md_seq_show(struct seq_file *seq, void *v) + struct mddev *mddev = v; +@@ -8406,7 +8433,7 @@ static int md_seq_show(struct seq_file * } else seq_printf(seq, "\n "); @@ -139,6 +133,3 @@ index 743244b06f679..887479e0d3afe 100644 seq_printf(seq, "\n"); } --- -2.39.5 - diff --git a/queue-6.1/md-md-bitmap-synchronize-bitmap_get_stats-with-bitma.patch b/queue-6.1/md-md-bitmap-synchronize-bitmap_get_stats-with-bitma.patch index 63c00b5179..faeab336e7 100644 --- a/queue-6.1/md-md-bitmap-synchronize-bitmap_get_stats-with-bitma.patch +++ b/queue-6.1/md-md-bitmap-synchronize-bitmap_get_stats-with-bitma.patch @@ -40,15 +40,13 @@ Link: https://lore.kernel.org/r/20250124092055.4050195-1-yukuai1@huaweicloud.com Signed-off-by: Song Liu Signed-off-by: Sasha Levin --- - drivers/md/md-bitmap.c | 5 ++++- - drivers/md/md.c | 5 +++++ - 2 files changed, 9 insertions(+), 1 deletion(-) + drivers/md/md-bitmap.c | 5 ++++- + drivers/md/md.c | 4 ++++ + 2 files changed, 8 insertions(+), 1 deletion(-) -diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c -index bddf4f3d27a77..e18e21b24210d 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c -@@ -2029,7 +2029,10 @@ int md_bitmap_get_stats(struct bitmap *bitmap, struct md_bitmap_stats *stats) +@@ -2029,7 +2029,10 @@ int md_bitmap_get_stats(struct bitmap *b if (!bitmap) return -ENOENT; @@ -60,29 +58,23 @@ index bddf4f3d27a77..e18e21b24210d 100644 sb = kmap_local_page(bitmap->storage.sb_page); stats->sync_size = le64_to_cpu(sb->sync_size); kunmap_local(sb); -diff --git a/drivers/md/md.c b/drivers/md/md.c -index 887479e0d3afe..e2a3a1e1afca0 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c -@@ -8317,6 +8317,10 @@ static int md_seq_show(struct seq_file *seq, void *v) +@@ -8368,6 +8368,9 @@ static int md_seq_show(struct seq_file * return 0; + } - spin_unlock(&all_mddevs_lock); -+ + /* prevent bitmap to be freed after checking */ + mutex_lock(&mddev->bitmap_info.mutex); + spin_lock(&mddev->lock); if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) { seq_printf(seq, "%s : %sactive", mdname(mddev), -@@ -8387,6 +8391,7 @@ static int md_seq_show(struct seq_file *seq, void *v) +@@ -8438,6 +8441,7 @@ static int md_seq_show(struct seq_file * seq_printf(seq, "\n"); } spin_unlock(&mddev->lock); + mutex_unlock(&mddev->bitmap_info.mutex); - spin_lock(&all_mddevs_lock); - if (atomic_dec_and_test(&mddev->active)) - __mddev_put(mddev); --- -2.39.5 - + + return 0; + } diff --git a/queue-6.1/md-md-cluster-fix-spares-warnings-for-__le64.patch b/queue-6.1/md-md-cluster-fix-spares-warnings-for-__le64.patch index caab2659a1..2e188ed674 100644 --- a/queue-6.1/md-md-cluster-fix-spares-warnings-for-__le64.patch +++ b/queue-6.1/md-md-cluster-fix-spares-warnings-for-__le64.patch @@ -23,14 +23,12 @@ Signed-off-by: Song Liu Stable-dep-of: 8d28d0ddb986 ("md/md-bitmap: Synchronize bitmap_get_stats() with bitmap lifetime") Signed-off-by: Sasha Levin --- - drivers/md/md-cluster.c | 6 +++--- + drivers/md/md-cluster.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) -diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c -index 10e0c5381d01b..a0d3f6c397707 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c -@@ -1195,7 +1195,7 @@ static int cluster_check_sync_size(struct mddev *mddev) +@@ -1195,7 +1195,7 @@ static int cluster_check_sync_size(struc struct dlm_lock_resource *bm_lockres; sb = kmap_atomic(bitmap->storage.sb_page); @@ -39,7 +37,7 @@ index 10e0c5381d01b..a0d3f6c397707 100644 kunmap_atomic(sb); for (i = 0; i < node_num; i++) { -@@ -1227,8 +1227,8 @@ static int cluster_check_sync_size(struct mddev *mddev) +@@ -1227,8 +1227,8 @@ static int cluster_check_sync_size(struc sb = kmap_atomic(bitmap->storage.sb_page); if (sync_size == 0) @@ -50,6 +48,3 @@ index 10e0c5381d01b..a0d3f6c397707 100644 kunmap_atomic(sb); md_bitmap_free(bitmap); return -1; --- -2.39.5 - diff --git a/queue-6.1/md-simplify-md_seq_ops.patch b/queue-6.1/md-simplify-md_seq_ops.patch deleted file mode 100644 index 46a2e651c2..0000000000 --- a/queue-6.1/md-simplify-md_seq_ops.patch +++ /dev/null @@ -1,181 +0,0 @@ -From 967a2f6f9119288165ecd4de0f01a2d0ed4a887c Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Wed, 27 Sep 2023 14:12:41 +0800 -Subject: md: simplify md_seq_ops - -From: Yu Kuai - -[ Upstream commit cf1b6d4441fffd0ba8ae4ced6a12f578c95ca049 ] - -Before this patch, the implementation is hacky and hard to understand: - -1) md_seq_start set pos to 1; -2) md_seq_show found pos is 1, then print Personalities; -3) md_seq_next found pos is 1, then it update pos to the first mddev; -4) md_seq_show found pos is not 1 or 2, show mddev; -5) md_seq_next found pos is not 1 or 2, update pos to next mddev; -6) loop 4-5 until the last mddev, then md_seq_next update pos to 2; -7) md_seq_show found pos is 2, then print unused devices; -8) md_seq_next found pos is 2, stop; - -This patch remove the magic value and use seq_list_start/next/stop() -directly, and move printing "Personalities" to md_seq_start(), -"unsed devices" to md_seq_stop(): - -1) md_seq_start print Personalities, and then set pos to first mddev; -2) md_seq_show show mddev; -3) md_seq_next update pos to next mddev; -4) loop 2-3 until the last mddev; -5) md_seq_stop print unsed devices; - -Signed-off-by: Yu Kuai -Signed-off-by: Song Liu -Link: https://lore.kernel.org/r/20230927061241.1552837-3-yukuai1@huaweicloud.com -Stable-dep-of: 8d28d0ddb986 ("md/md-bitmap: Synchronize bitmap_get_stats() with bitmap lifetime") -Signed-off-by: Sasha Levin ---- - drivers/md/md.c | 100 +++++++++++------------------------------------- - 1 file changed, 22 insertions(+), 78 deletions(-) - -diff --git a/drivers/md/md.c b/drivers/md/md.c -index 44bac1e7d47e2..743244b06f679 100644 ---- a/drivers/md/md.c -+++ b/drivers/md/md.c -@@ -8250,105 +8250,46 @@ static int status_resync(struct seq_file *seq, struct mddev *mddev) - } - - static void *md_seq_start(struct seq_file *seq, loff_t *pos) -+ __acquires(&all_mddevs_lock) - { -- struct list_head *tmp; -- loff_t l = *pos; -- struct mddev *mddev; -+ struct md_personality *pers; - -- if (l == 0x10000) { -- ++*pos; -- return (void *)2; -- } -- if (l > 0x10000) -- return NULL; -- if (!l--) -- /* header */ -- return (void*)1; -+ seq_puts(seq, "Personalities : "); -+ spin_lock(&pers_lock); -+ list_for_each_entry(pers, &pers_list, list) -+ seq_printf(seq, "[%s] ", pers->name); -+ -+ spin_unlock(&pers_lock); -+ seq_puts(seq, "\n"); -+ seq->poll_event = atomic_read(&md_event_count); - - spin_lock(&all_mddevs_lock); -- list_for_each(tmp,&all_mddevs) -- if (!l--) { -- mddev = list_entry(tmp, struct mddev, all_mddevs); -- if (!mddev_get(mddev)) -- continue; -- spin_unlock(&all_mddevs_lock); -- return mddev; -- } -- spin_unlock(&all_mddevs_lock); -- if (!l--) -- return (void*)2;/* tail */ -- return NULL; -+ -+ return seq_list_start(&all_mddevs, *pos); - } - - static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos) - { -- struct list_head *tmp; -- struct mddev *next_mddev, *mddev = v; -- struct mddev *to_put = NULL; -- -- ++*pos; -- if (v == (void*)2) -- return NULL; -- -- spin_lock(&all_mddevs_lock); -- if (v == (void*)1) { -- tmp = all_mddevs.next; -- } else { -- to_put = mddev; -- tmp = mddev->all_mddevs.next; -- } -- -- for (;;) { -- if (tmp == &all_mddevs) { -- next_mddev = (void*)2; -- *pos = 0x10000; -- break; -- } -- next_mddev = list_entry(tmp, struct mddev, all_mddevs); -- if (mddev_get(next_mddev)) -- break; -- mddev = next_mddev; -- tmp = mddev->all_mddevs.next; -- } -- spin_unlock(&all_mddevs_lock); -- -- if (to_put) -- mddev_put(to_put); -- return next_mddev; -- -+ return seq_list_next(v, &all_mddevs, pos); - } - - static void md_seq_stop(struct seq_file *seq, void *v) -+ __releases(&all_mddevs_lock) - { -- struct mddev *mddev = v; -- -- if (mddev && v != (void*)1 && v != (void*)2) -- mddev_put(mddev); -+ status_unused(seq); -+ spin_unlock(&all_mddevs_lock); - } - - static int md_seq_show(struct seq_file *seq, void *v) - { -- struct mddev *mddev = v; -+ struct mddev *mddev = list_entry(v, struct mddev, all_mddevs); - sector_t sectors; - struct md_rdev *rdev; - -- if (v == (void*)1) { -- struct md_personality *pers; -- seq_printf(seq, "Personalities : "); -- spin_lock(&pers_lock); -- list_for_each_entry(pers, &pers_list, list) -- seq_printf(seq, "[%s] ", pers->name); -- -- spin_unlock(&pers_lock); -- seq_printf(seq, "\n"); -- seq->poll_event = atomic_read(&md_event_count); -+ if (!mddev_get(mddev)) - return 0; -- } -- if (v == (void*)2) { -- status_unused(seq); -- return 0; -- } - -+ spin_unlock(&all_mddevs_lock); - spin_lock(&mddev->lock); - if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) { - seq_printf(seq, "%s : %sactive", mdname(mddev), -@@ -8419,6 +8360,9 @@ static int md_seq_show(struct seq_file *seq, void *v) - seq_printf(seq, "\n"); - } - spin_unlock(&mddev->lock); -+ spin_lock(&all_mddevs_lock); -+ if (atomic_dec_and_test(&mddev->active)) -+ __mddev_put(mddev); - - return 0; - } --- -2.39.5 - diff --git a/queue-6.1/md-use-separate-work_struct-for-md_start_sync.patch b/queue-6.1/md-use-separate-work_struct-for-md_start_sync.patch deleted file mode 100644 index 2b1ccb0270..0000000000 --- a/queue-6.1/md-use-separate-work_struct-for-md_start_sync.patch +++ /dev/null @@ -1,90 +0,0 @@ -From 0787fc2c3fe6a4fcf1d43a6a48718f054fc56f3f Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Fri, 25 Aug 2023 11:16:16 +0800 -Subject: md: use separate work_struct for md_start_sync() - -From: Yu Kuai - -[ Upstream commit ac619781967bd5663c29606246b50dbebd8b3473 ] - -It's a little weird to borrow 'del_work' for md_start_sync(), declare -a new work_struct 'sync_work' for md_start_sync(). - -Signed-off-by: Yu Kuai -Reviewed-by: Xiao Ni -Signed-off-by: Song Liu -Link: https://lore.kernel.org/r/20230825031622.1530464-2-yukuai1@huaweicloud.com -Stable-dep-of: 8d28d0ddb986 ("md/md-bitmap: Synchronize bitmap_get_stats() with bitmap lifetime") -Signed-off-by: Sasha Levin ---- - drivers/md/md.c | 10 ++++++---- - drivers/md/md.h | 5 ++++- - 2 files changed, 10 insertions(+), 5 deletions(-) - -diff --git a/drivers/md/md.c b/drivers/md/md.c -index 297c86f5c70b5..4b629b7a540f7 100644 ---- a/drivers/md/md.c -+++ b/drivers/md/md.c -@@ -682,13 +682,13 @@ void mddev_put(struct mddev *mddev) - * flush_workqueue() after mddev_find will succeed in waiting - * for the work to be done. - */ -- INIT_WORK(&mddev->del_work, mddev_delayed_delete); - queue_work(md_misc_wq, &mddev->del_work); - } - spin_unlock(&all_mddevs_lock); - } - - static void md_safemode_timeout(struct timer_list *t); -+static void md_start_sync(struct work_struct *ws); - - void mddev_init(struct mddev *mddev) - { -@@ -710,6 +710,9 @@ void mddev_init(struct mddev *mddev) - mddev->resync_min = 0; - mddev->resync_max = MaxSector; - mddev->level = LEVEL_NONE; -+ -+ INIT_WORK(&mddev->sync_work, md_start_sync); -+ INIT_WORK(&mddev->del_work, mddev_delayed_delete); - } - EXPORT_SYMBOL_GPL(mddev_init); - -@@ -9308,7 +9311,7 @@ static int remove_and_add_spares(struct mddev *mddev, - - static void md_start_sync(struct work_struct *ws) - { -- struct mddev *mddev = container_of(ws, struct mddev, del_work); -+ struct mddev *mddev = container_of(ws, struct mddev, sync_work); - - mddev->sync_thread = md_register_thread(md_do_sync, - mddev, -@@ -9516,8 +9519,7 @@ void md_check_recovery(struct mddev *mddev) - */ - md_bitmap_write_all(mddev->bitmap); - } -- INIT_WORK(&mddev->del_work, md_start_sync); -- queue_work(md_misc_wq, &mddev->del_work); -+ queue_work(md_misc_wq, &mddev->sync_work); - goto unlock; - } - not_running: -diff --git a/drivers/md/md.h b/drivers/md/md.h -index 4f0b480974552..c1258c94216ac 100644 ---- a/drivers/md/md.h -+++ b/drivers/md/md.h -@@ -452,7 +452,10 @@ struct mddev { - struct kernfs_node *sysfs_degraded; /*handle for 'degraded' */ - struct kernfs_node *sysfs_level; /*handle for 'level' */ - -- struct work_struct del_work; /* used for delayed sysfs removal */ -+ /* used for delayed sysfs removal */ -+ struct work_struct del_work; -+ /* used for register new sync thread */ -+ struct work_struct sync_work; - - /* "lock" protects: - * flush_bio transition from NULL to !NULL --- -2.39.5 - diff --git a/queue-6.1/rdma-mlx5-fix-mkey-cache-wq-flush.patch b/queue-6.1/rdma-mlx5-fix-mkey-cache-wq-flush.patch new file mode 100644 index 0000000000..c28bcf45ab --- /dev/null +++ b/queue-6.1/rdma-mlx5-fix-mkey-cache-wq-flush.patch @@ -0,0 +1,44 @@ +From a53e215f90079f617360439b1b6284820731e34c Mon Sep 17 00:00:00 2001 +From: Moshe Shemesh +Date: Wed, 25 Oct 2023 20:49:59 +0300 +Subject: RDMA/mlx5: Fix mkey cache WQ flush + +From: Moshe Shemesh + +commit a53e215f90079f617360439b1b6284820731e34c upstream. + +The cited patch tries to ensure no pending works on the mkey cache +workqueue by disabling adding new works and call flush_workqueue(). +But this workqueue also has delayed works which might still be pending +the delay time to be queued. + +Add cancel_delayed_work() for the delayed works which waits to be queued +and then the flush_workqueue() will flush all works which are already +queued and running. + +Fixes: 374012b00457 ("RDMA/mlx5: Fix mkey cache possible deadlock on cleanup") +Link: https://lore.kernel.org/r/b8722f14e7ed81452f791764a26d2ed4cfa11478.1698256179.git.leon@kernel.org +Signed-off-by: Moshe Shemesh +Signed-off-by: Leon Romanovsky +Signed-off-by: Jason Gunthorpe +Signed-off-by: Greg Kroah-Hartman +--- + drivers/infiniband/hw/mlx5/mr.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/infiniband/hw/mlx5/mr.c ++++ b/drivers/infiniband/hw/mlx5/mr.c +@@ -1007,11 +1007,13 @@ int mlx5_mkey_cache_cleanup(struct mlx5_ + return 0; + + mutex_lock(&dev->cache.rb_lock); ++ cancel_delayed_work(&dev->cache.remove_ent_dwork); + for (node = rb_first(root); node; node = rb_next(node)) { + ent = rb_entry(node, struct mlx5_cache_ent, node); + spin_lock_irq(&ent->mkeys_queue.lock); + ent->disabled = true; + spin_unlock_irq(&ent->mkeys_queue.lock); ++ cancel_delayed_work(&ent->dwork); + } + mutex_unlock(&dev->cache.rb_lock); + diff --git a/queue-6.1/rdma-mlx5-fix-mr-cache-temp-entries-cleanup.patch b/queue-6.1/rdma-mlx5-fix-mr-cache-temp-entries-cleanup.patch new file mode 100644 index 0000000000..223ad4d579 --- /dev/null +++ b/queue-6.1/rdma-mlx5-fix-mr-cache-temp-entries-cleanup.patch @@ -0,0 +1,214 @@ +From 7ebb00cea49db641b458edef0ede389f7004821d Mon Sep 17 00:00:00 2001 +From: Michael Guralnik +Date: Tue, 3 Sep 2024 14:24:50 +0300 +Subject: RDMA/mlx5: Fix MR cache temp entries cleanup + +From: Michael Guralnik + +commit 7ebb00cea49db641b458edef0ede389f7004821d upstream. + +Fix the cleanup of the temp cache entries that are dynamically created +in the MR cache. + +The cleanup of the temp cache entries is currently scheduled only when a +new entry is created. Since in the cleanup of the entries only the mkeys +are destroyed and the cache entry stays in the cache, subsequent +registrations might reuse the entry and it will eventually be filled with +new mkeys without cleanup ever getting scheduled again. + +On workloads that register and deregister MRs with a wide range of +properties we see the cache ends up holding many cache entries, each +holding the max number of mkeys that were ever used through it. + +Additionally, as the cleanup work is scheduled to run over the whole +cache, any mkey that is returned to the cache after the cleanup was +scheduled will be held for less than the intended 30 seconds timeout. + +Solve both issues by dropping the existing remove_ent_work and reusing +the existing per-entry work to also handle the temp entries cleanup. + +Schedule the work to run with a 30 seconds delay every time we push an +mkey to a clean temp entry. +This ensures the cleanup runs on each entry only 30 seconds after the +first mkey was pushed to an empty entry. + +As we have already been distinguishing between persistent and temp entries +when scheduling the cache_work_func, it is not being scheduled in any +other flows for the temp entries. + +Another benefit from moving to a per-entry cleanup is we now not +required to hold the rb_tree mutex, thus enabling other flow to run +concurrently. + +Fixes: dd1b913fb0d0 ("RDMA/mlx5: Cache all user cacheable mkeys on dereg MR flow") +Signed-off-by: Michael Guralnik +Link: https://patch.msgid.link/e4fa4bb03bebf20dceae320f26816cd2dde23a26.1725362530.git.leon@kernel.org +Signed-off-by: Leon Romanovsky +Signed-off-by: Greg Kroah-Hartman +--- + drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 + drivers/infiniband/hw/mlx5/mr.c | 85 +++++++++++++---------------------- + 2 files changed, 34 insertions(+), 53 deletions(-) + +--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h ++++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h +@@ -790,6 +790,7 @@ struct mlx5_cache_ent { + u8 is_tmp:1; + u8 disabled:1; + u8 fill_to_high_water:1; ++ u8 tmp_cleanup_scheduled:1; + + /* + * - limit is the low water mark for stored mkeys, 2* limit is the +@@ -821,7 +822,6 @@ struct mlx5_mkey_cache { + struct mutex rb_lock; + struct dentry *fs_root; + unsigned long last_add; +- struct delayed_work remove_ent_dwork; + }; + + struct mlx5_ib_port_resources { +--- a/drivers/infiniband/hw/mlx5/mr.c ++++ b/drivers/infiniband/hw/mlx5/mr.c +@@ -525,6 +525,23 @@ static void queue_adjust_cache_locked(st + } + } + ++static void clean_keys(struct mlx5_ib_dev *dev, struct mlx5_cache_ent *ent) ++{ ++ u32 mkey; ++ ++ cancel_delayed_work(&ent->dwork); ++ spin_lock_irq(&ent->mkeys_queue.lock); ++ while (ent->mkeys_queue.ci) { ++ mkey = pop_mkey_locked(ent); ++ spin_unlock_irq(&ent->mkeys_queue.lock); ++ mlx5_core_destroy_mkey(dev->mdev, mkey); ++ spin_lock_irq(&ent->mkeys_queue.lock); ++ } ++ ent->tmp_cleanup_scheduled = false; ++ spin_unlock_irq(&ent->mkeys_queue.lock); ++} ++ ++ + static void __cache_work_func(struct mlx5_cache_ent *ent) + { + struct mlx5_ib_dev *dev = ent->dev; +@@ -596,7 +613,11 @@ static void delayed_cache_work_func(stru + struct mlx5_cache_ent *ent; + + ent = container_of(work, struct mlx5_cache_ent, dwork.work); +- __cache_work_func(ent); ++ /* temp entries are never filled, only cleaned */ ++ if (ent->is_tmp) ++ clean_keys(ent->dev, ent); ++ else ++ __cache_work_func(ent); + } + + static int cache_ent_key_cmp(struct mlx5r_cache_rb_key key1, +@@ -771,21 +792,6 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(s + return _mlx5_mr_cache_alloc(dev, ent, access_flags); + } + +-static void clean_keys(struct mlx5_ib_dev *dev, struct mlx5_cache_ent *ent) +-{ +- u32 mkey; +- +- cancel_delayed_work(&ent->dwork); +- spin_lock_irq(&ent->mkeys_queue.lock); +- while (ent->mkeys_queue.ci) { +- mkey = pop_mkey_locked(ent); +- spin_unlock_irq(&ent->mkeys_queue.lock); +- mlx5_core_destroy_mkey(dev->mdev, mkey); +- spin_lock_irq(&ent->mkeys_queue.lock); +- } +- spin_unlock_irq(&ent->mkeys_queue.lock); +-} +- + static void mlx5_mkey_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) + { + if (!mlx5_debugfs_root || dev->is_rep) +@@ -898,10 +904,6 @@ mlx5r_cache_create_ent_locked(struct mlx + ent->limit = 0; + + mlx5_mkey_cache_debugfs_add_ent(dev, ent); +- } else { +- mod_delayed_work(ent->dev->cache.wq, +- &ent->dev->cache.remove_ent_dwork, +- msecs_to_jiffies(30 * 1000)); + } + + return ent; +@@ -912,35 +914,6 @@ mkeys_err: + return ERR_PTR(ret); + } + +-static void remove_ent_work_func(struct work_struct *work) +-{ +- struct mlx5_mkey_cache *cache; +- struct mlx5_cache_ent *ent; +- struct rb_node *cur; +- +- cache = container_of(work, struct mlx5_mkey_cache, +- remove_ent_dwork.work); +- mutex_lock(&cache->rb_lock); +- cur = rb_last(&cache->rb_root); +- while (cur) { +- ent = rb_entry(cur, struct mlx5_cache_ent, node); +- cur = rb_prev(cur); +- mutex_unlock(&cache->rb_lock); +- +- spin_lock_irq(&ent->mkeys_queue.lock); +- if (!ent->is_tmp) { +- spin_unlock_irq(&ent->mkeys_queue.lock); +- mutex_lock(&cache->rb_lock); +- continue; +- } +- spin_unlock_irq(&ent->mkeys_queue.lock); +- +- clean_keys(ent->dev, ent); +- mutex_lock(&cache->rb_lock); +- } +- mutex_unlock(&cache->rb_lock); +-} +- + int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev) + { + struct mlx5_mkey_cache *cache = &dev->cache; +@@ -956,7 +929,6 @@ int mlx5_mkey_cache_init(struct mlx5_ib_ + mutex_init(&dev->slow_path_mutex); + mutex_init(&dev->cache.rb_lock); + dev->cache.rb_root = RB_ROOT; +- INIT_DELAYED_WORK(&dev->cache.remove_ent_dwork, remove_ent_work_func); + cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM); + if (!cache->wq) { + mlx5_ib_warn(dev, "failed to create work queue\n"); +@@ -1007,7 +979,6 @@ int mlx5_mkey_cache_cleanup(struct mlx5_ + return 0; + + mutex_lock(&dev->cache.rb_lock); +- cancel_delayed_work(&dev->cache.remove_ent_dwork); + for (node = rb_first(root); node; node = rb_next(node)) { + ent = rb_entry(node, struct mlx5_cache_ent, node); + spin_lock_irq(&ent->mkeys_queue.lock); +@@ -1844,8 +1815,18 @@ static int mlx5_revoke_mr(struct mlx5_ib + struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); + struct mlx5_cache_ent *ent = mr->mmkey.cache_ent; + +- if (mr->mmkey.cacheable && !mlx5r_umr_revoke_mr(mr) && !cache_ent_find_and_store(dev, mr)) ++ if (mr->mmkey.cacheable && !mlx5r_umr_revoke_mr(mr) && !cache_ent_find_and_store(dev, mr)) { ++ ent = mr->mmkey.cache_ent; ++ /* upon storing to a clean temp entry - schedule its cleanup */ ++ spin_lock_irq(&ent->mkeys_queue.lock); ++ if (ent->is_tmp && !ent->tmp_cleanup_scheduled) { ++ mod_delayed_work(ent->dev->cache.wq, &ent->dwork, ++ msecs_to_jiffies(30 * 1000)); ++ ent->tmp_cleanup_scheduled = true; ++ } ++ spin_unlock_irq(&ent->mkeys_queue.lock); + return 0; ++ } + + if (ent) { + spin_lock_irq(&ent->mkeys_queue.lock); diff --git a/queue-6.1/rdma-mlx5-remove-not-used-cache-disable-flag.patch b/queue-6.1/rdma-mlx5-remove-not-used-cache-disable-flag.patch new file mode 100644 index 0000000000..0a384b91e9 --- /dev/null +++ b/queue-6.1/rdma-mlx5-remove-not-used-cache-disable-flag.patch @@ -0,0 +1,53 @@ +From c99a7457e5bb873914a74307ba2df85f6799203b Mon Sep 17 00:00:00 2001 +From: Leon Romanovsky +Date: Thu, 28 Sep 2023 20:20:47 +0300 +Subject: RDMA/mlx5: Remove not-used cache disable flag + +From: Leon Romanovsky + +commit c99a7457e5bb873914a74307ba2df85f6799203b upstream. + +During execution of mlx5_mkey_cache_cleanup(), there is a guarantee +that MR are not registered and/or destroyed. It means that we don't +need newly introduced cache disable flag. + +Fixes: 374012b00457 ("RDMA/mlx5: Fix mkey cache possible deadlock on cleanup") +Link: https://lore.kernel.org/r/c7e9c9f98c8ae4a7413d97d9349b29f5b0a23dbe.1695921626.git.leon@kernel.org +Signed-off-by: Leon Romanovsky +Signed-off-by: Greg Kroah-Hartman +--- + drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 - + drivers/infiniband/hw/mlx5/mr.c | 5 ----- + 2 files changed, 6 deletions(-) + +--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h ++++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h +@@ -822,7 +822,6 @@ struct mlx5_mkey_cache { + struct dentry *fs_root; + unsigned long last_add; + struct delayed_work remove_ent_dwork; +- u8 disable: 1; + }; + + struct mlx5_ib_port_resources { +--- a/drivers/infiniband/hw/mlx5/mr.c ++++ b/drivers/infiniband/hw/mlx5/mr.c +@@ -1007,7 +1007,6 @@ int mlx5_mkey_cache_cleanup(struct mlx5_ + return 0; + + mutex_lock(&dev->cache.rb_lock); +- dev->cache.disable = true; + for (node = rb_first(root); node; node = rb_next(node)) { + ent = rb_entry(node, struct mlx5_cache_ent, node); + spin_lock_irq(&ent->mkeys_queue.lock); +@@ -1810,10 +1809,6 @@ static int cache_ent_find_and_store(stru + } + + mutex_lock(&cache->rb_lock); +- if (cache->disable) { +- mutex_unlock(&cache->rb_lock); +- return 0; +- } + ent = mkey_cache_ent_from_rb_key(dev, mr->mmkey.rb_key); + if (ent) { + if (ent->rb_key.ndescs == mr->mmkey.rb_key.ndescs) { diff --git a/queue-6.1/series b/queue-6.1/series index a31cc50b60..3dfeae4dff 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -1,7 +1,4 @@ arm64-mte-do-not-allow-prot_mte-on-map_hugetlb-user-mappings.patch -md-use-separate-work_struct-for-md_start_sync.patch -md-factor-out-a-helper-from-mddev_put.patch -md-simplify-md_seq_ops.patch md-md-bitmap-replace-md_bitmap_status-with-a-new-hel.patch md-md-cluster-fix-spares-warnings-for-__le64.patch md-md-bitmap-add-sync_size-into-struct-md_bitmap_sta.patch @@ -185,3 +182,6 @@ rdma-mlx5-follow-rb_key.ats-when-creating-new-mkeys.patch rdma-mlx5-ensure-created-mkeys-always-have-a-populated-rb_key.patch rdma-mlx5-fix-counter-update-on-mr-cache-mkey-creation.patch rdma-mlx5-limit-usage-of-over-sized-mkeys-from-the-mr-cache.patch +rdma-mlx5-remove-not-used-cache-disable-flag.patch +rdma-mlx5-fix-mkey-cache-wq-flush.patch +rdma-mlx5-fix-mr-cache-temp-entries-cleanup.patch