From: Greg Kroah-Hartman Date: Thu, 6 Mar 2025 14:14:10 +0000 (+0100) Subject: 6.1-stable patches X-Git-Tag: v6.6.81~3 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=ee70eabb8516ab103457cf074cd7d534cd99c176;p=thirdparty%2Fkernel%2Fstable-queue.git 6.1-stable patches added patches: rdma-mlx5-change-check-for-cacheable-mkeys.patch rdma-mlx5-check-reg_create-create-for-errors.patch rdma-mlx5-ensure-created-mkeys-always-have-a-populated-rb_key.patch rdma-mlx5-fix-assigning-access-flags-to-cache-mkeys.patch rdma-mlx5-fix-counter-update-on-mr-cache-mkey-creation.patch rdma-mlx5-fix-mkey-cache-possible-deadlock-on-cleanup.patch rdma-mlx5-fix-mr-cache-debugfs-error-in-ib-representors-mode.patch rdma-mlx5-follow-rb_key.ats-when-creating-new-mkeys.patch rdma-mlx5-limit-usage-of-over-sized-mkeys-from-the-mr-cache.patch rdma-mlx5-remove-extra-unlock-on-error-path.patch rdma-mlx5-uncacheable-mkey-has-neither-rb_key-or-cache_ent.patch --- diff --git a/queue-6.1/rdma-mlx5-change-check-for-cacheable-mkeys.patch b/queue-6.1/rdma-mlx5-change-check-for-cacheable-mkeys.patch new file mode 100644 index 0000000000..c7b2df5ba0 --- /dev/null +++ b/queue-6.1/rdma-mlx5-change-check-for-cacheable-mkeys.patch @@ -0,0 +1,96 @@ +From 8c1185fef68cc603b954fece2a434c9f851d6a86 Mon Sep 17 00:00:00 2001 +From: Or Har-Toov +Date: Wed, 3 Apr 2024 13:36:00 +0300 +Subject: RDMA/mlx5: Change check for cacheable mkeys + +From: Or Har-Toov + +commit 8c1185fef68cc603b954fece2a434c9f851d6a86 upstream. + +umem can be NULL for user application mkeys in some cases. Therefore +umem can't be used for checking if the mkey is cacheable and it is +changed for checking a flag that indicates it. Also make sure that +all mkeys which are not returned to the cache will be destroyed. + +Fixes: dd1b913fb0d0 ("RDMA/mlx5: Cache all user cacheable mkeys on dereg MR flow") +Signed-off-by: Or Har-Toov +Link: https://lore.kernel.org/r/2690bc5c6896bcb937f89af16a1ff0343a7ab3d0.1712140377.git.leon@kernel.org +Signed-off-by: Leon Romanovsky +Signed-off-by: Greg Kroah-Hartman +--- + drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + + drivers/infiniband/hw/mlx5/mr.c | 32 ++++++++++++++++++++++---------- + 2 files changed, 23 insertions(+), 10 deletions(-) + +--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h ++++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h +@@ -654,6 +654,7 @@ struct mlx5_ib_mkey { + /* Cacheable user Mkey must hold either a rb_key or a cache_ent. */ + struct mlx5r_cache_rb_key rb_key; + struct mlx5_cache_ent *cache_ent; ++ u8 cacheable : 1; + }; + + #define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE) +--- a/drivers/infiniband/hw/mlx5/mr.c ++++ b/drivers/infiniband/hw/mlx5/mr.c +@@ -1155,6 +1155,7 @@ static struct mlx5_ib_mr *alloc_cacheabl + if (IS_ERR(mr)) + return mr; + mr->mmkey.rb_key = rb_key; ++ mr->mmkey.cacheable = true; + return mr; + } + +@@ -1165,6 +1166,7 @@ static struct mlx5_ib_mr *alloc_cacheabl + mr->ibmr.pd = pd; + mr->umem = umem; + mr->page_shift = order_base_2(page_size); ++ mr->mmkey.cacheable = true; + set_mr_fields(dev, mr, umem->length, access_flags, iova); + + return mr; +@@ -1830,6 +1832,23 @@ end: + return ret; + } + ++static int mlx5_revoke_mr(struct mlx5_ib_mr *mr) ++{ ++ struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); ++ struct mlx5_cache_ent *ent = mr->mmkey.cache_ent; ++ ++ if (mr->mmkey.cacheable && !mlx5r_umr_revoke_mr(mr) && !cache_ent_find_and_store(dev, mr)) ++ return 0; ++ ++ if (ent) { ++ spin_lock_irq(&ent->mkeys_queue.lock); ++ ent->in_use--; ++ mr->mmkey.cache_ent = NULL; ++ spin_unlock_irq(&ent->mkeys_queue.lock); ++ } ++ return destroy_mkey(dev, mr); ++} ++ + int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) + { + struct mlx5_ib_mr *mr = to_mmr(ibmr); +@@ -1875,16 +1894,9 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, + } + + /* Stop DMA */ +- if (mr->umem && mlx5r_umr_can_load_pas(dev, mr->umem->length)) +- if (mlx5r_umr_revoke_mr(mr) || +- cache_ent_find_and_store(dev, mr)) +- mr->mmkey.cache_ent = NULL; +- +- if (!mr->mmkey.cache_ent) { +- rc = destroy_mkey(to_mdev(mr->ibmr.device), mr); +- if (rc) +- return rc; +- } ++ rc = mlx5_revoke_mr(mr); ++ if (rc) ++ return rc; + + if (mr->umem) { + bool is_odp = is_odp_mr(mr); diff --git a/queue-6.1/rdma-mlx5-check-reg_create-create-for-errors.patch b/queue-6.1/rdma-mlx5-check-reg_create-create-for-errors.patch new file mode 100644 index 0000000000..666ce7fb24 --- /dev/null +++ b/queue-6.1/rdma-mlx5-check-reg_create-create-for-errors.patch @@ -0,0 +1,32 @@ +From 8e6e49ccf1a0f2b3257394dc8610bb6d48859d3f Mon Sep 17 00:00:00 2001 +From: Dan Carpenter +Date: Mon, 6 Feb 2023 17:40:35 +0300 +Subject: RDMA/mlx5: Check reg_create() create for errors + +From: Dan Carpenter + +commit 8e6e49ccf1a0f2b3257394dc8610bb6d48859d3f upstream. + +The reg_create() can fail. Check for errors before dereferencing it. + +Fixes: dd1b913fb0d0 ("RDMA/mlx5: Cache all user cacheable mkeys on dereg MR flow") +Signed-off-by: Dan Carpenter +Link: https://lore.kernel.org/r/Y+ERYy4wN0LsKsm+@kili +Reviewed-by: Devesh Sharma +Signed-off-by: Leon Romanovsky +Signed-off-by: Greg Kroah-Hartman +--- + drivers/infiniband/hw/mlx5/mr.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/infiniband/hw/mlx5/mr.c ++++ b/drivers/infiniband/hw/mlx5/mr.c +@@ -1143,6 +1143,8 @@ static struct mlx5_ib_mr *alloc_cacheabl + mutex_lock(&dev->slow_path_mutex); + mr = reg_create(pd, umem, iova, access_flags, page_size, false); + mutex_unlock(&dev->slow_path_mutex); ++ if (IS_ERR(mr)) ++ return mr; + mr->mmkey.rb_key = rb_key; + return mr; + } diff --git a/queue-6.1/rdma-mlx5-ensure-created-mkeys-always-have-a-populated-rb_key.patch b/queue-6.1/rdma-mlx5-ensure-created-mkeys-always-have-a-populated-rb_key.patch new file mode 100644 index 0000000000..add9d0b3c3 --- /dev/null +++ b/queue-6.1/rdma-mlx5-ensure-created-mkeys-always-have-a-populated-rb_key.patch @@ -0,0 +1,49 @@ +From 2e4c02fdecf2f6f55cefe48cb82d93fa4f8e2204 Mon Sep 17 00:00:00 2001 +From: Jason Gunthorpe +Date: Tue, 28 May 2024 15:52:54 +0300 +Subject: RDMA/mlx5: Ensure created mkeys always have a populated rb_key + +From: Jason Gunthorpe + +commit 2e4c02fdecf2f6f55cefe48cb82d93fa4f8e2204 upstream. + +cachable and mmkey.rb_key together are used by mlx5_revoke_mr() to put the +MR/mkey back into the cache. In all cases they should be set correctly. + +alloc_cacheable_mr() was setting cachable but not filling rb_key, +resulting in cache_ent_find_and_store() bucketing them all into a 0 length +entry. + +implicit_get_child_mr()/mlx5_ib_alloc_implicit_mr() failed to set cachable +or rb_key at all, so the cache was not working at all for implicit ODP. + +Cc: stable@vger.kernel.org +Fixes: 8c1185fef68c ("RDMA/mlx5: Change check for cacheable mkeys") +Fixes: dd1b913fb0d0 ("RDMA/mlx5: Cache all user cacheable mkeys on dereg MR flow") +Signed-off-by: Jason Gunthorpe +Link: https://lore.kernel.org/r/7778c02dfa0999a30d6746c79a23dd7140a9c729.1716900410.git.leon@kernel.org +Signed-off-by: Leon Romanovsky +Signed-off-by: Greg Kroah-Hartman +--- + drivers/infiniband/hw/mlx5/mr.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/infiniband/hw/mlx5/mr.c ++++ b/drivers/infiniband/hw/mlx5/mr.c +@@ -715,6 +715,8 @@ static struct mlx5_ib_mr *_mlx5_mr_cache + } + mr->mmkey.cache_ent = ent; + mr->mmkey.type = MLX5_MKEY_MR; ++ mr->mmkey.rb_key = ent->rb_key; ++ mr->mmkey.cacheable = true; + init_waitqueue_head(&mr->mmkey.wait); + return mr; + } +@@ -1165,7 +1167,6 @@ static struct mlx5_ib_mr *alloc_cacheabl + mr->ibmr.pd = pd; + mr->umem = umem; + mr->page_shift = order_base_2(page_size); +- mr->mmkey.cacheable = true; + set_mr_fields(dev, mr, umem->length, access_flags, iova); + + return mr; diff --git a/queue-6.1/rdma-mlx5-fix-assigning-access-flags-to-cache-mkeys.patch b/queue-6.1/rdma-mlx5-fix-assigning-access-flags-to-cache-mkeys.patch new file mode 100644 index 0000000000..840c967cc2 --- /dev/null +++ b/queue-6.1/rdma-mlx5-fix-assigning-access-flags-to-cache-mkeys.patch @@ -0,0 +1,46 @@ +From 4f14c6c0213e1def48f0f887d35f44095416c67d Mon Sep 17 00:00:00 2001 +From: Michael Guralnik +Date: Wed, 20 Sep 2023 13:01:54 +0300 +Subject: RDMA/mlx5: Fix assigning access flags to cache mkeys + +From: Michael Guralnik + +commit 4f14c6c0213e1def48f0f887d35f44095416c67d upstream. + +After the change to use dynamic cache structure, new cache entries +can be added and the mkey allocation can no longer assume that all +mkeys created for the cache have access_flags equal to zero. + +Example of a flow that exposes the issue: +A user registers MR with RO on a HCA that cannot UMR RO and the mkey is +created outside of the cache. When the user deregisters the MR, a new +cache entry is created to store mkeys with RO. + +Later, the user registers 2 MRs with RO. The first MR is reused from the +new cache entry. When we try to get the second mkey from the cache we see +the entry is empty so we go to the MR cache mkey allocation flow which +would have allocated a mkey with no access flags, resulting the user getting +a MR without RO. + +Fixes: dd1b913fb0d0 ("RDMA/mlx5: Cache all user cacheable mkeys on dereg MR flow") +Reviewed-by: Edward Srouji +Signed-off-by: Michael Guralnik +Link: https://lore.kernel.org/r/8a802700b82def3ace3f77cd7a9ad9d734af87e7.1695203958.git.leonro@nvidia.com +Signed-off-by: Leon Romanovsky +Signed-off-by: Greg Kroah-Hartman +--- + drivers/infiniband/hw/mlx5/mr.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/infiniband/hw/mlx5/mr.c ++++ b/drivers/infiniband/hw/mlx5/mr.c +@@ -236,7 +236,8 @@ static int get_mkc_octo_size(unsigned in + + static void set_cache_mkc(struct mlx5_cache_ent *ent, void *mkc) + { +- set_mkc_access_pd_addr_fields(mkc, 0, 0, ent->dev->umrc.pd); ++ set_mkc_access_pd_addr_fields(mkc, ent->rb_key.access_flags, 0, ++ ent->dev->umrc.pd); + MLX5_SET(mkc, mkc, free, 1); + MLX5_SET(mkc, mkc, umr_en, 1); + MLX5_SET(mkc, mkc, access_mode_1_0, ent->rb_key.access_mode & 0x3); diff --git a/queue-6.1/rdma-mlx5-fix-counter-update-on-mr-cache-mkey-creation.patch b/queue-6.1/rdma-mlx5-fix-counter-update-on-mr-cache-mkey-creation.patch new file mode 100644 index 0000000000..2d8a92613f --- /dev/null +++ b/queue-6.1/rdma-mlx5-fix-counter-update-on-mr-cache-mkey-creation.patch @@ -0,0 +1,41 @@ +From 6f5cd6ac9a4201e4ba6f10b76a9da8044d6e38b0 Mon Sep 17 00:00:00 2001 +From: Michael Guralnik +Date: Tue, 3 Sep 2024 14:24:48 +0300 +Subject: RDMA/mlx5: Fix counter update on MR cache mkey creation + +From: Michael Guralnik + +commit 6f5cd6ac9a4201e4ba6f10b76a9da8044d6e38b0 upstream. + +After an mkey is created, update the counter for pending mkeys before +reshceduling the work that is filling the cache. + +Rescheduling the work with a full MR cache entry and a wrong 'pending' +counter will cause us to miss disabling the fill_to_high_water flag. +Thus leaving the cache full but with an indication that it's still +needs to be filled up to it's full size (2 * limit). +Next time an mkey will be taken from the cache, we'll unnecessarily +continue the process of filling the cache to it's full size. + +Fixes: 57e7071683ef ("RDMA/mlx5: Implement mkeys management via LIFO queue") +Signed-off-by: Michael Guralnik +Link: https://patch.msgid.link/0f44f462ba22e45f72cb3d0ec6a748634086b8d0.1725362530.git.leon@kernel.org +Signed-off-by: Leon Romanovsky +Signed-off-by: Greg Kroah-Hartman +--- + drivers/infiniband/hw/mlx5/mr.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/infiniband/hw/mlx5/mr.c ++++ b/drivers/infiniband/hw/mlx5/mr.c +@@ -208,9 +208,9 @@ static void create_mkey_callback(int sta + + spin_lock_irqsave(&ent->mkeys_queue.lock, flags); + push_mkey_locked(ent, mkey_out->mkey); ++ ent->pending--; + /* If we are doing fill_to_high_water then keep going. */ + queue_adjust_cache_locked(ent); +- ent->pending--; + spin_unlock_irqrestore(&ent->mkeys_queue.lock, flags); + kfree(mkey_out); + } diff --git a/queue-6.1/rdma-mlx5-fix-mkey-cache-possible-deadlock-on-cleanup.patch b/queue-6.1/rdma-mlx5-fix-mkey-cache-possible-deadlock-on-cleanup.patch new file mode 100644 index 0000000000..731d75d835 --- /dev/null +++ b/queue-6.1/rdma-mlx5-fix-mkey-cache-possible-deadlock-on-cleanup.patch @@ -0,0 +1,151 @@ +From 374012b0045780b7ad498be62e85153009bb7fe9 Mon Sep 17 00:00:00 2001 +From: Shay Drory +Date: Tue, 12 Sep 2023 13:07:45 +0300 +Subject: RDMA/mlx5: Fix mkey cache possible deadlock on cleanup + +From: Shay Drory + +commit 374012b0045780b7ad498be62e85153009bb7fe9 upstream. + +Fix the deadlock by refactoring the MR cache cleanup flow to flush the +workqueue without holding the rb_lock. +This adds a race between cache cleanup and creation of new entries which +we solve by denied creation of new entries after cache cleanup started. + +Lockdep: +WARNING: possible circular locking dependency detected + [ 2785.326074 ] 6.2.0-rc6_for_upstream_debug_2023_01_31_14_02 #1 Not tainted + [ 2785.339778 ] ------------------------------------------------------ + [ 2785.340848 ] devlink/53872 is trying to acquire lock: + [ 2785.341701 ] ffff888124f8c0c8 ((work_completion)(&(&ent->dwork)->work)){+.+.}-{0:0}, at: __flush_work+0xc8/0x900 + [ 2785.343403 ] + [ 2785.343403 ] but task is already holding lock: + [ 2785.344464 ] ffff88817e8f1260 (&dev->cache.rb_lock){+.+.}-{3:3}, at: mlx5_mkey_cache_cleanup+0x77/0x250 [mlx5_ib] + [ 2785.346273 ] + [ 2785.346273 ] which lock already depends on the new lock. + [ 2785.346273 ] + [ 2785.347720 ] + [ 2785.347720 ] the existing dependency chain (in reverse order) is: + [ 2785.349003 ] + [ 2785.349003 ] -> #1 (&dev->cache.rb_lock){+.+.}-{3:3}: + [ 2785.350160 ] __mutex_lock+0x14c/0x15c0 + [ 2785.350962 ] delayed_cache_work_func+0x2d1/0x610 [mlx5_ib] + [ 2785.352044 ] process_one_work+0x7c2/0x1310 + [ 2785.352879 ] worker_thread+0x59d/0xec0 + [ 2785.353636 ] kthread+0x28f/0x330 + [ 2785.354370 ] ret_from_fork+0x1f/0x30 + [ 2785.355135 ] + [ 2785.355135 ] -> #0 ((work_completion)(&(&ent->dwork)->work)){+.+.}-{0:0}: + [ 2785.356515 ] __lock_acquire+0x2d8a/0x5fe0 + [ 2785.357349 ] lock_acquire+0x1c1/0x540 + [ 2785.358121 ] __flush_work+0xe8/0x900 + [ 2785.358852 ] __cancel_work_timer+0x2c7/0x3f0 + [ 2785.359711 ] mlx5_mkey_cache_cleanup+0xfb/0x250 [mlx5_ib] + [ 2785.360781 ] mlx5_ib_stage_pre_ib_reg_umr_cleanup+0x16/0x30 [mlx5_ib] + [ 2785.361969 ] __mlx5_ib_remove+0x68/0x120 [mlx5_ib] + [ 2785.362960 ] mlx5r_remove+0x63/0x80 [mlx5_ib] + [ 2785.363870 ] auxiliary_bus_remove+0x52/0x70 + [ 2785.364715 ] device_release_driver_internal+0x3c1/0x600 + [ 2785.365695 ] bus_remove_device+0x2a5/0x560 + [ 2785.366525 ] device_del+0x492/0xb80 + [ 2785.367276 ] mlx5_detach_device+0x1a9/0x360 [mlx5_core] + [ 2785.368615 ] mlx5_unload_one_devl_locked+0x5a/0x110 [mlx5_core] + [ 2785.369934 ] mlx5_devlink_reload_down+0x292/0x580 [mlx5_core] + [ 2785.371292 ] devlink_reload+0x439/0x590 + [ 2785.372075 ] devlink_nl_cmd_reload+0xaef/0xff0 + [ 2785.372973 ] genl_family_rcv_msg_doit.isra.0+0x1bd/0x290 + [ 2785.374011 ] genl_rcv_msg+0x3ca/0x6c0 + [ 2785.374798 ] netlink_rcv_skb+0x12c/0x360 + [ 2785.375612 ] genl_rcv+0x24/0x40 + [ 2785.376295 ] netlink_unicast+0x438/0x710 + [ 2785.377121 ] netlink_sendmsg+0x7a1/0xca0 + [ 2785.377926 ] sock_sendmsg+0xc5/0x190 + [ 2785.378668 ] __sys_sendto+0x1bc/0x290 + [ 2785.379440 ] __x64_sys_sendto+0xdc/0x1b0 + [ 2785.380255 ] do_syscall_64+0x3d/0x90 + [ 2785.381031 ] entry_SYSCALL_64_after_hwframe+0x46/0xb0 + [ 2785.381967 ] + [ 2785.381967 ] other info that might help us debug this: + [ 2785.381967 ] + [ 2785.383448 ] Possible unsafe locking scenario: + [ 2785.383448 ] + [ 2785.384544 ] CPU0 CPU1 + [ 2785.385383 ] ---- ---- + [ 2785.386193 ] lock(&dev->cache.rb_lock); + [ 2785.386940 ] lock((work_completion)(&(&ent->dwork)->work)); + [ 2785.388327 ] lock(&dev->cache.rb_lock); + [ 2785.389425 ] lock((work_completion)(&(&ent->dwork)->work)); + [ 2785.390414 ] + [ 2785.390414 ] *** DEADLOCK *** + [ 2785.390414 ] + [ 2785.391579 ] 6 locks held by devlink/53872: + [ 2785.392341 ] #0: ffffffff84c17a50 (cb_lock){++++}-{3:3}, at: genl_rcv+0x15/0x40 + [ 2785.393630 ] #1: ffff888142280218 (&devlink->lock_key){+.+.}-{3:3}, at: devlink_get_from_attrs_lock+0x12d/0x2d0 + [ 2785.395324 ] #2: ffff8881422d3c38 (&dev->lock_key){+.+.}-{3:3}, at: mlx5_unload_one_devl_locked+0x4a/0x110 [mlx5_core] + [ 2785.397322 ] #3: ffffffffa0e59068 (mlx5_intf_mutex){+.+.}-{3:3}, at: mlx5_detach_device+0x60/0x360 [mlx5_core] + [ 2785.399231 ] #4: ffff88810e3cb0e8 (&dev->mutex){....}-{3:3}, at: device_release_driver_internal+0x8d/0x600 + [ 2785.400864 ] #5: ffff88817e8f1260 (&dev->cache.rb_lock){+.+.}-{3:3}, at: mlx5_mkey_cache_cleanup+0x77/0x250 [mlx5_ib] + +Fixes: b95845178328 ("RDMA/mlx5: Change the cache structure to an RB-tree") +Signed-off-by: Shay Drory +Signed-off-by: Michael Guralnik +Signed-off-by: Leon Romanovsky +Signed-off-by: Greg Kroah-Hartman +--- + drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + + drivers/infiniband/hw/mlx5/mr.c | 16 ++++++++++++++-- + 2 files changed, 15 insertions(+), 2 deletions(-) + +--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h ++++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h +@@ -821,6 +821,7 @@ struct mlx5_mkey_cache { + struct dentry *fs_root; + unsigned long last_add; + struct delayed_work remove_ent_dwork; ++ u8 disable: 1; + }; + + struct mlx5_ib_port_resources { +--- a/drivers/infiniband/hw/mlx5/mr.c ++++ b/drivers/infiniband/hw/mlx5/mr.c +@@ -994,19 +994,27 @@ int mlx5_mkey_cache_cleanup(struct mlx5_ + if (!dev->cache.wq) + return 0; + +- cancel_delayed_work_sync(&dev->cache.remove_ent_dwork); + mutex_lock(&dev->cache.rb_lock); ++ dev->cache.disable = true; + for (node = rb_first(root); node; node = rb_next(node)) { + ent = rb_entry(node, struct mlx5_cache_ent, node); + spin_lock_irq(&ent->mkeys_queue.lock); + ent->disabled = true; + spin_unlock_irq(&ent->mkeys_queue.lock); +- cancel_delayed_work_sync(&ent->dwork); + } ++ mutex_unlock(&dev->cache.rb_lock); ++ ++ /* ++ * After all entries are disabled and will not reschedule on WQ, ++ * flush it and all async commands. ++ */ ++ flush_workqueue(dev->cache.wq); + + mlx5_mkey_cache_debugfs_cleanup(dev); + mlx5_cmd_cleanup_async_ctx(&dev->async_ctx); + ++ /* At this point all entries are disabled and have no concurrent work. */ ++ mutex_lock(&dev->cache.rb_lock); + node = rb_first(root); + while (node) { + ent = rb_entry(node, struct mlx5_cache_ent, node); +@@ -1789,6 +1797,10 @@ static int cache_ent_find_and_store(stru + } + + mutex_lock(&cache->rb_lock); ++ if (cache->disable) { ++ mutex_unlock(&cache->rb_lock); ++ return 0; ++ } + ent = mkey_cache_ent_from_rb_key(dev, mr->mmkey.rb_key); + if (ent) { + if (ent->rb_key.ndescs == mr->mmkey.rb_key.ndescs) { diff --git a/queue-6.1/rdma-mlx5-fix-mr-cache-debugfs-error-in-ib-representors-mode.patch b/queue-6.1/rdma-mlx5-fix-mr-cache-debugfs-error-in-ib-representors-mode.patch new file mode 100644 index 0000000000..31fbb7172e --- /dev/null +++ b/queue-6.1/rdma-mlx5-fix-mr-cache-debugfs-error-in-ib-representors-mode.patch @@ -0,0 +1,52 @@ +From 828cf5936bea2438c21a3a6c303b34a2a1f6c3c2 Mon Sep 17 00:00:00 2001 +From: Leon Romanovsky +Date: Thu, 2 Feb 2023 11:03:06 +0200 +Subject: RDMA/mlx5: Fix MR cache debugfs error in IB representors mode + +From: Leon Romanovsky + +commit 828cf5936bea2438c21a3a6c303b34a2a1f6c3c2 upstream. + +Block MR cache debugfs creation for IB representor flow as MR cache shouldn't be used +at all in that mode. As part of this change, add missing debugfs cleanup in error path +too. + +This change fixes the following debugfs errors: + + bond0: (slave enp8s0f1): Enslaving as a backup interface with an up link + mlx5_core 0000:08:00.0: lag map: port 1:1 port 2:1 + mlx5_core 0000:08:00.0: shared_fdb:1 mode:queue_affinity + mlx5_core 0000:08:00.0: Operation mode is single FDB + debugfs: Directory '2' with parent '/' already present! +... + debugfs: Directory '22' with parent '/' already present! + +Fixes: 73d09b2fe833 ("RDMA/mlx5: Introduce mlx5r_cache_rb_key") +Signed-off-by: Michael Guralnik +Link: https://lore.kernel.org/r/482a78c54acbcfa1742a0e06a452546428900ffa.1675328463.git.leon@kernel.org +Signed-off-by: Leon Romanovsky +Signed-off-by: Greg Kroah-Hartman +--- + drivers/infiniband/hw/mlx5/mr.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/drivers/infiniband/hw/mlx5/mr.c ++++ b/drivers/infiniband/hw/mlx5/mr.c +@@ -789,6 +789,9 @@ static void mlx5_mkey_cache_debugfs_add_ + int order = order_base_2(ent->rb_key.ndescs); + struct dentry *dir; + ++ if (!mlx5_debugfs_root || dev->is_rep) ++ return; ++ + if (ent->rb_key.access_mode == MLX5_MKC_ACCESS_MODE_KSM) + order = MLX5_IMR_KSM_CACHE_ENTRY + 2; + +@@ -977,6 +980,7 @@ int mlx5_mkey_cache_init(struct mlx5_ib_ + + err: + mutex_unlock(&cache->rb_lock); ++ mlx5_mkey_cache_debugfs_cleanup(dev); + mlx5_ib_warn(dev, "failed to create mkey cache entry\n"); + return ret; + } diff --git a/queue-6.1/rdma-mlx5-follow-rb_key.ats-when-creating-new-mkeys.patch b/queue-6.1/rdma-mlx5-follow-rb_key.ats-when-creating-new-mkeys.patch new file mode 100644 index 0000000000..c513393fce --- /dev/null +++ b/queue-6.1/rdma-mlx5-follow-rb_key.ats-when-creating-new-mkeys.patch @@ -0,0 +1,37 @@ +From f637040c3339a2ed8c12d65ad03f9552386e2fe7 Mon Sep 17 00:00:00 2001 +From: Jason Gunthorpe +Date: Tue, 28 May 2024 15:52:53 +0300 +Subject: RDMA/mlx5: Follow rb_key.ats when creating new mkeys + +From: Jason Gunthorpe + +commit f637040c3339a2ed8c12d65ad03f9552386e2fe7 upstream. + +When a cache ent already exists but doesn't have any mkeys in it the cache +will automatically create a new one based on the specification in the +ent->rb_key. + +ent->ats was missed when creating the new key and so ma_translation_mode +was not being set even though the ent requires it. + +Cc: stable@vger.kernel.org +Fixes: 73d09b2fe833 ("RDMA/mlx5: Introduce mlx5r_cache_rb_key") +Signed-off-by: Jason Gunthorpe +Reviewed-by: Michael Guralnik +Link: https://lore.kernel.org/r/7c5613458ecb89fbe5606b7aa4c8d990bdea5b9a.1716900410.git.leon@kernel.org +Signed-off-by: Leon Romanovsky +Signed-off-by: Greg Kroah-Hartman +--- + drivers/infiniband/hw/mlx5/mr.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/infiniband/hw/mlx5/mr.c ++++ b/drivers/infiniband/hw/mlx5/mr.c +@@ -243,6 +243,7 @@ static void set_cache_mkc(struct mlx5_ca + MLX5_SET(mkc, mkc, access_mode_1_0, ent->rb_key.access_mode & 0x3); + MLX5_SET(mkc, mkc, access_mode_4_2, + (ent->rb_key.access_mode >> 2) & 0x7); ++ MLX5_SET(mkc, mkc, ma_translation_mode, !!ent->rb_key.ats); + + MLX5_SET(mkc, mkc, translations_octword_size, + get_mkc_octo_size(ent->rb_key.access_mode, diff --git a/queue-6.1/rdma-mlx5-limit-usage-of-over-sized-mkeys-from-the-mr-cache.patch b/queue-6.1/rdma-mlx5-limit-usage-of-over-sized-mkeys-from-the-mr-cache.patch new file mode 100644 index 0000000000..c68b5d0f1c --- /dev/null +++ b/queue-6.1/rdma-mlx5-limit-usage-of-over-sized-mkeys-from-the-mr-cache.patch @@ -0,0 +1,92 @@ +From ee6d57a2e13d11ce9050cfc3e3b69ef707a44a63 Mon Sep 17 00:00:00 2001 +From: Michael Guralnik +Date: Tue, 3 Sep 2024 14:24:49 +0300 +Subject: RDMA/mlx5: Limit usage of over-sized mkeys from the MR cache + +From: Michael Guralnik + +commit ee6d57a2e13d11ce9050cfc3e3b69ef707a44a63 upstream. + +When searching the MR cache for suitable cache entries, don't use mkeys +larger than twice the size required for the MR. +This should ensure the usage of mkeys closer to the minimal required size +and reduce memory waste. + +On driver init we create entries for mkeys with clear attributes and +powers of 2 sizes from 4 to the max supported size. +This solves the issue for anyone using mkeys that fit these +requirements. + +In the use case where an MR is registered with different attributes, +like an access flag we can't UMR, we'll create a new cache entry to store +it upon dereg. +Without this fix, any later registration with same attributes and smaller +size will use the newly created cache entry and it's mkeys, disregarding +the memory waste of using mkeys larger than required. + +For example, one worst-case scenario can be when registering and +deregistering a 1GB mkey with ATS enabled which will cause the creation of +a new cache entry to hold those type of mkeys. A user registering a 4k MR +with ATS will end up using the new cache entry and an mkey that can +support a 1GB MR, thus wasting x250k memory than actually needed in the HW. + +Additionally, allow all small registration to use the smallest size +cache entry that is initialized on driver load even if size is larger +than twice the required size. + +Fixes: 73d09b2fe833 ("RDMA/mlx5: Introduce mlx5r_cache_rb_key") +Signed-off-by: Michael Guralnik +Link: https://patch.msgid.link/8ba3a6e3748aace2026de8b83da03aba084f78f4.1725362530.git.leon@kernel.org +Signed-off-by: Leon Romanovsky +Signed-off-by: Greg Kroah-Hartman +--- + drivers/infiniband/hw/mlx5/mr.c | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +--- a/drivers/infiniband/hw/mlx5/mr.c ++++ b/drivers/infiniband/hw/mlx5/mr.c +@@ -48,6 +48,7 @@ enum { + MAX_PENDING_REG_MR = 8, + }; + ++#define MLX5_MR_CACHE_PERSISTENT_ENTRY_MIN_DESCS 4 + #define MLX5_UMR_ALIGN 2048 + + static void +@@ -656,6 +657,7 @@ mkey_cache_ent_from_rb_key(struct mlx5_i + { + struct rb_node *node = dev->cache.rb_root.rb_node; + struct mlx5_cache_ent *cur, *smallest = NULL; ++ u64 ndescs_limit; + int cmp; + + /* +@@ -674,10 +676,18 @@ mkey_cache_ent_from_rb_key(struct mlx5_i + return cur; + } + ++ /* ++ * Limit the usage of mkeys larger than twice the required size while ++ * also allowing the usage of smallest cache entry for small MRs. ++ */ ++ ndescs_limit = max_t(u64, rb_key.ndescs * 2, ++ MLX5_MR_CACHE_PERSISTENT_ENTRY_MIN_DESCS); ++ + return (smallest && + smallest->rb_key.access_mode == rb_key.access_mode && + smallest->rb_key.access_flags == rb_key.access_flags && +- smallest->rb_key.ats == rb_key.ats) ? ++ smallest->rb_key.ats == rb_key.ats && ++ smallest->rb_key.ndescs <= ndescs_limit) ? + smallest : + NULL; + } +@@ -958,7 +968,7 @@ int mlx5_mkey_cache_init(struct mlx5_ib_ + mlx5_mkey_cache_debugfs_init(dev); + mutex_lock(&cache->rb_lock); + for (i = 0; i <= mkey_cache_max_order(dev); i++) { +- rb_key.ndescs = 1 << (i + 2); ++ rb_key.ndescs = MLX5_MR_CACHE_PERSISTENT_ENTRY_MIN_DESCS << i; + ent = mlx5r_cache_create_ent_locked(dev, rb_key, true); + if (IS_ERR(ent)) { + ret = PTR_ERR(ent); diff --git a/queue-6.1/rdma-mlx5-remove-extra-unlock-on-error-path.patch b/queue-6.1/rdma-mlx5-remove-extra-unlock-on-error-path.patch new file mode 100644 index 0000000000..4961a41598 --- /dev/null +++ b/queue-6.1/rdma-mlx5-remove-extra-unlock-on-error-path.patch @@ -0,0 +1,38 @@ +From c1eb2512596fb3542357bb6c34c286f5e0374538 Mon Sep 17 00:00:00 2001 +From: Jason Gunthorpe +Date: Tue, 28 May 2024 15:52:52 +0300 +Subject: RDMA/mlx5: Remove extra unlock on error path + +From: Jason Gunthorpe + +commit c1eb2512596fb3542357bb6c34c286f5e0374538 upstream. + +The below commit lifted the locking out of this function but left this +error path unlock behind resulting in unbalanced locking. Remove the +missed unlock too. + +Cc: stable@vger.kernel.org +Fixes: 627122280c87 ("RDMA/mlx5: Add work to remove temporary entries from the cache") +Signed-off-by: Jason Gunthorpe +Reviewed-by: Michael Guralnik +Link: https://lore.kernel.org/r/78090c210c750f47219b95248f9f782f34548bb1.1716900410.git.leon@kernel.org +Signed-off-by: Leon Romanovsky +Signed-off-by: Greg Kroah-Hartman +--- + drivers/infiniband/hw/mlx5/mr.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/drivers/infiniband/hw/mlx5/mr.c ++++ b/drivers/infiniband/hw/mlx5/mr.c +@@ -638,10 +638,8 @@ static int mlx5_cache_ent_insert(struct + new = &((*new)->rb_left); + if (cmp < 0) + new = &((*new)->rb_right); +- if (cmp == 0) { +- mutex_unlock(&cache->rb_lock); ++ if (cmp == 0) + return -EEXIST; +- } + } + + /* Add new node and rebalance tree. */ diff --git a/queue-6.1/rdma-mlx5-uncacheable-mkey-has-neither-rb_key-or-cache_ent.patch b/queue-6.1/rdma-mlx5-uncacheable-mkey-has-neither-rb_key-or-cache_ent.patch new file mode 100644 index 0000000000..4778718718 --- /dev/null +++ b/queue-6.1/rdma-mlx5-uncacheable-mkey-has-neither-rb_key-or-cache_ent.patch @@ -0,0 +1,33 @@ +From 0611a8e8b475fc5230b9a24d29c8397aaab20b63 Mon Sep 17 00:00:00 2001 +From: Or Har-Toov +Date: Wed, 3 Apr 2024 13:35:59 +0300 +Subject: RDMA/mlx5: Uncacheable mkey has neither rb_key or cache_ent + +From: Or Har-Toov + +commit 0611a8e8b475fc5230b9a24d29c8397aaab20b63 upstream. + +As some mkeys can't be modified with UMR due to some UMR limitations, +like the size of translation that can be updated, not all user mkeys can +be cached. + +Fixes: dd1b913fb0d0 ("RDMA/mlx5: Cache all user cacheable mkeys on dereg MR flow") +Signed-off-by: Or Har-Toov +Link: https://lore.kernel.org/r/f2742dd934ed73b2d32c66afb8e91b823063880c.1712140377.git.leon@kernel.org +Signed-off-by: Leon Romanovsky +Signed-off-by: Greg Kroah-Hartman +--- + drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h ++++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h +@@ -651,7 +651,7 @@ struct mlx5_ib_mkey { + unsigned int ndescs; + struct wait_queue_head wait; + refcount_t usecount; +- /* User Mkey must hold either a rb_key or a cache_ent. */ ++ /* Cacheable user Mkey must hold either a rb_key or a cache_ent. */ + struct mlx5r_cache_rb_key rb_key; + struct mlx5_cache_ent *cache_ent; + }; diff --git a/queue-6.1/series b/queue-6.1/series index 11d814cbe0..a31cc50b60 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -174,3 +174,14 @@ mm-memory-use-exception-ip-to-search-exception-tables.patch squashfs-check-the-inode-number-is-not-the-invalid-value-of-zero.patch pfifo_tail_enqueue-drop-new-packet-when-sch-limit-0.patch media-mtk-vcodec-potential-null-pointer-deference-in-scp.patch +rdma-mlx5-fix-mr-cache-debugfs-error-in-ib-representors-mode.patch +rdma-mlx5-check-reg_create-create-for-errors.patch +rdma-mlx5-fix-mkey-cache-possible-deadlock-on-cleanup.patch +rdma-mlx5-fix-assigning-access-flags-to-cache-mkeys.patch +rdma-mlx5-uncacheable-mkey-has-neither-rb_key-or-cache_ent.patch +rdma-mlx5-change-check-for-cacheable-mkeys.patch +rdma-mlx5-remove-extra-unlock-on-error-path.patch +rdma-mlx5-follow-rb_key.ats-when-creating-new-mkeys.patch +rdma-mlx5-ensure-created-mkeys-always-have-a-populated-rb_key.patch +rdma-mlx5-fix-counter-update-on-mr-cache-mkey-creation.patch +rdma-mlx5-limit-usage-of-over-sized-mkeys-from-the-mr-cache.patch