--- /dev/null
+From 8c1185fef68cc603b954fece2a434c9f851d6a86 Mon Sep 17 00:00:00 2001
+From: Or Har-Toov <ohartoov@nvidia.com>
+Date: Wed, 3 Apr 2024 13:36:00 +0300
+Subject: RDMA/mlx5: Change check for cacheable mkeys
+
+From: Or Har-Toov <ohartoov@nvidia.com>
+
+commit 8c1185fef68cc603b954fece2a434c9f851d6a86 upstream.
+
+umem can be NULL for user application mkeys in some cases. Therefore
+umem can't be used for checking if the mkey is cacheable and it is
+changed for checking a flag that indicates it. Also make sure that
+all mkeys which are not returned to the cache will be destroyed.
+
+Fixes: dd1b913fb0d0 ("RDMA/mlx5: Cache all user cacheable mkeys on dereg MR flow")
+Signed-off-by: Or Har-Toov <ohartoov@nvidia.com>
+Link: https://lore.kernel.org/r/2690bc5c6896bcb937f89af16a1ff0343a7ab3d0.1712140377.git.leon@kernel.org
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 +
+ drivers/infiniband/hw/mlx5/mr.c | 32 ++++++++++++++++++++++----------
+ 2 files changed, 23 insertions(+), 10 deletions(-)
+
+--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
++++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
+@@ -654,6 +654,7 @@ struct mlx5_ib_mkey {
+ /* Cacheable user Mkey must hold either a rb_key or a cache_ent. */
+ struct mlx5r_cache_rb_key rb_key;
+ struct mlx5_cache_ent *cache_ent;
++ u8 cacheable : 1;
+ };
+
+ #define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)
+--- a/drivers/infiniband/hw/mlx5/mr.c
++++ b/drivers/infiniband/hw/mlx5/mr.c
+@@ -1155,6 +1155,7 @@ static struct mlx5_ib_mr *alloc_cacheabl
+ if (IS_ERR(mr))
+ return mr;
+ mr->mmkey.rb_key = rb_key;
++ mr->mmkey.cacheable = true;
+ return mr;
+ }
+
+@@ -1165,6 +1166,7 @@ static struct mlx5_ib_mr *alloc_cacheabl
+ mr->ibmr.pd = pd;
+ mr->umem = umem;
+ mr->page_shift = order_base_2(page_size);
++ mr->mmkey.cacheable = true;
+ set_mr_fields(dev, mr, umem->length, access_flags, iova);
+
+ return mr;
+@@ -1830,6 +1832,23 @@ end:
+ return ret;
+ }
+
++static int mlx5_revoke_mr(struct mlx5_ib_mr *mr)
++{
++ struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
++ struct mlx5_cache_ent *ent = mr->mmkey.cache_ent;
++
++ if (mr->mmkey.cacheable && !mlx5r_umr_revoke_mr(mr) && !cache_ent_find_and_store(dev, mr))
++ return 0;
++
++ if (ent) {
++ spin_lock_irq(&ent->mkeys_queue.lock);
++ ent->in_use--;
++ mr->mmkey.cache_ent = NULL;
++ spin_unlock_irq(&ent->mkeys_queue.lock);
++ }
++ return destroy_mkey(dev, mr);
++}
++
+ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
+ {
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+@@ -1875,16 +1894,9 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr,
+ }
+
+ /* Stop DMA */
+- if (mr->umem && mlx5r_umr_can_load_pas(dev, mr->umem->length))
+- if (mlx5r_umr_revoke_mr(mr) ||
+- cache_ent_find_and_store(dev, mr))
+- mr->mmkey.cache_ent = NULL;
+-
+- if (!mr->mmkey.cache_ent) {
+- rc = destroy_mkey(to_mdev(mr->ibmr.device), mr);
+- if (rc)
+- return rc;
+- }
++ rc = mlx5_revoke_mr(mr);
++ if (rc)
++ return rc;
+
+ if (mr->umem) {
+ bool is_odp = is_odp_mr(mr);
--- /dev/null
+From 8e6e49ccf1a0f2b3257394dc8610bb6d48859d3f Mon Sep 17 00:00:00 2001
+From: Dan Carpenter <error27@gmail.com>
+Date: Mon, 6 Feb 2023 17:40:35 +0300
+Subject: RDMA/mlx5: Check reg_create() create for errors
+
+From: Dan Carpenter <error27@gmail.com>
+
+commit 8e6e49ccf1a0f2b3257394dc8610bb6d48859d3f upstream.
+
+The reg_create() can fail. Check for errors before dereferencing it.
+
+Fixes: dd1b913fb0d0 ("RDMA/mlx5: Cache all user cacheable mkeys on dereg MR flow")
+Signed-off-by: Dan Carpenter <error27@gmail.com>
+Link: https://lore.kernel.org/r/Y+ERYy4wN0LsKsm+@kili
+Reviewed-by: Devesh Sharma <devesh.s.sharma@oracle.com>
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/hw/mlx5/mr.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/infiniband/hw/mlx5/mr.c
++++ b/drivers/infiniband/hw/mlx5/mr.c
+@@ -1143,6 +1143,8 @@ static struct mlx5_ib_mr *alloc_cacheabl
+ mutex_lock(&dev->slow_path_mutex);
+ mr = reg_create(pd, umem, iova, access_flags, page_size, false);
+ mutex_unlock(&dev->slow_path_mutex);
++ if (IS_ERR(mr))
++ return mr;
+ mr->mmkey.rb_key = rb_key;
+ return mr;
+ }
--- /dev/null
+From 2e4c02fdecf2f6f55cefe48cb82d93fa4f8e2204 Mon Sep 17 00:00:00 2001
+From: Jason Gunthorpe <jgg@nvidia.com>
+Date: Tue, 28 May 2024 15:52:54 +0300
+Subject: RDMA/mlx5: Ensure created mkeys always have a populated rb_key
+
+From: Jason Gunthorpe <jgg@nvidia.com>
+
+commit 2e4c02fdecf2f6f55cefe48cb82d93fa4f8e2204 upstream.
+
+cachable and mmkey.rb_key together are used by mlx5_revoke_mr() to put the
+MR/mkey back into the cache. In all cases they should be set correctly.
+
+alloc_cacheable_mr() was setting cachable but not filling rb_key,
+resulting in cache_ent_find_and_store() bucketing them all into a 0 length
+entry.
+
+implicit_get_child_mr()/mlx5_ib_alloc_implicit_mr() failed to set cachable
+or rb_key at all, so the cache was not working at all for implicit ODP.
+
+Cc: stable@vger.kernel.org
+Fixes: 8c1185fef68c ("RDMA/mlx5: Change check for cacheable mkeys")
+Fixes: dd1b913fb0d0 ("RDMA/mlx5: Cache all user cacheable mkeys on dereg MR flow")
+Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
+Link: https://lore.kernel.org/r/7778c02dfa0999a30d6746c79a23dd7140a9c729.1716900410.git.leon@kernel.org
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/hw/mlx5/mr.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/infiniband/hw/mlx5/mr.c
++++ b/drivers/infiniband/hw/mlx5/mr.c
+@@ -715,6 +715,8 @@ static struct mlx5_ib_mr *_mlx5_mr_cache
+ }
+ mr->mmkey.cache_ent = ent;
+ mr->mmkey.type = MLX5_MKEY_MR;
++ mr->mmkey.rb_key = ent->rb_key;
++ mr->mmkey.cacheable = true;
+ init_waitqueue_head(&mr->mmkey.wait);
+ return mr;
+ }
+@@ -1165,7 +1167,6 @@ static struct mlx5_ib_mr *alloc_cacheabl
+ mr->ibmr.pd = pd;
+ mr->umem = umem;
+ mr->page_shift = order_base_2(page_size);
+- mr->mmkey.cacheable = true;
+ set_mr_fields(dev, mr, umem->length, access_flags, iova);
+
+ return mr;
--- /dev/null
+From 4f14c6c0213e1def48f0f887d35f44095416c67d Mon Sep 17 00:00:00 2001
+From: Michael Guralnik <michaelgur@nvidia.com>
+Date: Wed, 20 Sep 2023 13:01:54 +0300
+Subject: RDMA/mlx5: Fix assigning access flags to cache mkeys
+
+From: Michael Guralnik <michaelgur@nvidia.com>
+
+commit 4f14c6c0213e1def48f0f887d35f44095416c67d upstream.
+
+After the change to use dynamic cache structure, new cache entries
+can be added and the mkey allocation can no longer assume that all
+mkeys created for the cache have access_flags equal to zero.
+
+Example of a flow that exposes the issue:
+A user registers MR with RO on a HCA that cannot UMR RO and the mkey is
+created outside of the cache. When the user deregisters the MR, a new
+cache entry is created to store mkeys with RO.
+
+Later, the user registers 2 MRs with RO. The first MR is reused from the
+new cache entry. When we try to get the second mkey from the cache we see
+the entry is empty so we go to the MR cache mkey allocation flow which
+would have allocated a mkey with no access flags, resulting the user getting
+a MR without RO.
+
+Fixes: dd1b913fb0d0 ("RDMA/mlx5: Cache all user cacheable mkeys on dereg MR flow")
+Reviewed-by: Edward Srouji <edwards@nvidia.com>
+Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
+Link: https://lore.kernel.org/r/8a802700b82def3ace3f77cd7a9ad9d734af87e7.1695203958.git.leonro@nvidia.com
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/hw/mlx5/mr.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/infiniband/hw/mlx5/mr.c
++++ b/drivers/infiniband/hw/mlx5/mr.c
+@@ -236,7 +236,8 @@ static int get_mkc_octo_size(unsigned in
+
+ static void set_cache_mkc(struct mlx5_cache_ent *ent, void *mkc)
+ {
+- set_mkc_access_pd_addr_fields(mkc, 0, 0, ent->dev->umrc.pd);
++ set_mkc_access_pd_addr_fields(mkc, ent->rb_key.access_flags, 0,
++ ent->dev->umrc.pd);
+ MLX5_SET(mkc, mkc, free, 1);
+ MLX5_SET(mkc, mkc, umr_en, 1);
+ MLX5_SET(mkc, mkc, access_mode_1_0, ent->rb_key.access_mode & 0x3);
--- /dev/null
+From 6f5cd6ac9a4201e4ba6f10b76a9da8044d6e38b0 Mon Sep 17 00:00:00 2001
+From: Michael Guralnik <michaelgur@nvidia.com>
+Date: Tue, 3 Sep 2024 14:24:48 +0300
+Subject: RDMA/mlx5: Fix counter update on MR cache mkey creation
+
+From: Michael Guralnik <michaelgur@nvidia.com>
+
+commit 6f5cd6ac9a4201e4ba6f10b76a9da8044d6e38b0 upstream.
+
+After an mkey is created, update the counter for pending mkeys before
+reshceduling the work that is filling the cache.
+
+Rescheduling the work with a full MR cache entry and a wrong 'pending'
+counter will cause us to miss disabling the fill_to_high_water flag.
+Thus leaving the cache full but with an indication that it's still
+needs to be filled up to it's full size (2 * limit).
+Next time an mkey will be taken from the cache, we'll unnecessarily
+continue the process of filling the cache to it's full size.
+
+Fixes: 57e7071683ef ("RDMA/mlx5: Implement mkeys management via LIFO queue")
+Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
+Link: https://patch.msgid.link/0f44f462ba22e45f72cb3d0ec6a748634086b8d0.1725362530.git.leon@kernel.org
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/hw/mlx5/mr.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/infiniband/hw/mlx5/mr.c
++++ b/drivers/infiniband/hw/mlx5/mr.c
+@@ -208,9 +208,9 @@ static void create_mkey_callback(int sta
+
+ spin_lock_irqsave(&ent->mkeys_queue.lock, flags);
+ push_mkey_locked(ent, mkey_out->mkey);
++ ent->pending--;
+ /* If we are doing fill_to_high_water then keep going. */
+ queue_adjust_cache_locked(ent);
+- ent->pending--;
+ spin_unlock_irqrestore(&ent->mkeys_queue.lock, flags);
+ kfree(mkey_out);
+ }
--- /dev/null
+From 374012b0045780b7ad498be62e85153009bb7fe9 Mon Sep 17 00:00:00 2001
+From: Shay Drory <shayd@nvidia.com>
+Date: Tue, 12 Sep 2023 13:07:45 +0300
+Subject: RDMA/mlx5: Fix mkey cache possible deadlock on cleanup
+
+From: Shay Drory <shayd@nvidia.com>
+
+commit 374012b0045780b7ad498be62e85153009bb7fe9 upstream.
+
+Fix the deadlock by refactoring the MR cache cleanup flow to flush the
+workqueue without holding the rb_lock.
+This adds a race between cache cleanup and creation of new entries which
+we solve by denied creation of new entries after cache cleanup started.
+
+Lockdep:
+WARNING: possible circular locking dependency detected
+ [ 2785.326074 ] 6.2.0-rc6_for_upstream_debug_2023_01_31_14_02 #1 Not tainted
+ [ 2785.339778 ] ------------------------------------------------------
+ [ 2785.340848 ] devlink/53872 is trying to acquire lock:
+ [ 2785.341701 ] ffff888124f8c0c8 ((work_completion)(&(&ent->dwork)->work)){+.+.}-{0:0}, at: __flush_work+0xc8/0x900
+ [ 2785.343403 ]
+ [ 2785.343403 ] but task is already holding lock:
+ [ 2785.344464 ] ffff88817e8f1260 (&dev->cache.rb_lock){+.+.}-{3:3}, at: mlx5_mkey_cache_cleanup+0x77/0x250 [mlx5_ib]
+ [ 2785.346273 ]
+ [ 2785.346273 ] which lock already depends on the new lock.
+ [ 2785.346273 ]
+ [ 2785.347720 ]
+ [ 2785.347720 ] the existing dependency chain (in reverse order) is:
+ [ 2785.349003 ]
+ [ 2785.349003 ] -> #1 (&dev->cache.rb_lock){+.+.}-{3:3}:
+ [ 2785.350160 ] __mutex_lock+0x14c/0x15c0
+ [ 2785.350962 ] delayed_cache_work_func+0x2d1/0x610 [mlx5_ib]
+ [ 2785.352044 ] process_one_work+0x7c2/0x1310
+ [ 2785.352879 ] worker_thread+0x59d/0xec0
+ [ 2785.353636 ] kthread+0x28f/0x330
+ [ 2785.354370 ] ret_from_fork+0x1f/0x30
+ [ 2785.355135 ]
+ [ 2785.355135 ] -> #0 ((work_completion)(&(&ent->dwork)->work)){+.+.}-{0:0}:
+ [ 2785.356515 ] __lock_acquire+0x2d8a/0x5fe0
+ [ 2785.357349 ] lock_acquire+0x1c1/0x540
+ [ 2785.358121 ] __flush_work+0xe8/0x900
+ [ 2785.358852 ] __cancel_work_timer+0x2c7/0x3f0
+ [ 2785.359711 ] mlx5_mkey_cache_cleanup+0xfb/0x250 [mlx5_ib]
+ [ 2785.360781 ] mlx5_ib_stage_pre_ib_reg_umr_cleanup+0x16/0x30 [mlx5_ib]
+ [ 2785.361969 ] __mlx5_ib_remove+0x68/0x120 [mlx5_ib]
+ [ 2785.362960 ] mlx5r_remove+0x63/0x80 [mlx5_ib]
+ [ 2785.363870 ] auxiliary_bus_remove+0x52/0x70
+ [ 2785.364715 ] device_release_driver_internal+0x3c1/0x600
+ [ 2785.365695 ] bus_remove_device+0x2a5/0x560
+ [ 2785.366525 ] device_del+0x492/0xb80
+ [ 2785.367276 ] mlx5_detach_device+0x1a9/0x360 [mlx5_core]
+ [ 2785.368615 ] mlx5_unload_one_devl_locked+0x5a/0x110 [mlx5_core]
+ [ 2785.369934 ] mlx5_devlink_reload_down+0x292/0x580 [mlx5_core]
+ [ 2785.371292 ] devlink_reload+0x439/0x590
+ [ 2785.372075 ] devlink_nl_cmd_reload+0xaef/0xff0
+ [ 2785.372973 ] genl_family_rcv_msg_doit.isra.0+0x1bd/0x290
+ [ 2785.374011 ] genl_rcv_msg+0x3ca/0x6c0
+ [ 2785.374798 ] netlink_rcv_skb+0x12c/0x360
+ [ 2785.375612 ] genl_rcv+0x24/0x40
+ [ 2785.376295 ] netlink_unicast+0x438/0x710
+ [ 2785.377121 ] netlink_sendmsg+0x7a1/0xca0
+ [ 2785.377926 ] sock_sendmsg+0xc5/0x190
+ [ 2785.378668 ] __sys_sendto+0x1bc/0x290
+ [ 2785.379440 ] __x64_sys_sendto+0xdc/0x1b0
+ [ 2785.380255 ] do_syscall_64+0x3d/0x90
+ [ 2785.381031 ] entry_SYSCALL_64_after_hwframe+0x46/0xb0
+ [ 2785.381967 ]
+ [ 2785.381967 ] other info that might help us debug this:
+ [ 2785.381967 ]
+ [ 2785.383448 ] Possible unsafe locking scenario:
+ [ 2785.383448 ]
+ [ 2785.384544 ] CPU0 CPU1
+ [ 2785.385383 ] ---- ----
+ [ 2785.386193 ] lock(&dev->cache.rb_lock);
+ [ 2785.386940 ] lock((work_completion)(&(&ent->dwork)->work));
+ [ 2785.388327 ] lock(&dev->cache.rb_lock);
+ [ 2785.389425 ] lock((work_completion)(&(&ent->dwork)->work));
+ [ 2785.390414 ]
+ [ 2785.390414 ] *** DEADLOCK ***
+ [ 2785.390414 ]
+ [ 2785.391579 ] 6 locks held by devlink/53872:
+ [ 2785.392341 ] #0: ffffffff84c17a50 (cb_lock){++++}-{3:3}, at: genl_rcv+0x15/0x40
+ [ 2785.393630 ] #1: ffff888142280218 (&devlink->lock_key){+.+.}-{3:3}, at: devlink_get_from_attrs_lock+0x12d/0x2d0
+ [ 2785.395324 ] #2: ffff8881422d3c38 (&dev->lock_key){+.+.}-{3:3}, at: mlx5_unload_one_devl_locked+0x4a/0x110 [mlx5_core]
+ [ 2785.397322 ] #3: ffffffffa0e59068 (mlx5_intf_mutex){+.+.}-{3:3}, at: mlx5_detach_device+0x60/0x360 [mlx5_core]
+ [ 2785.399231 ] #4: ffff88810e3cb0e8 (&dev->mutex){....}-{3:3}, at: device_release_driver_internal+0x8d/0x600
+ [ 2785.400864 ] #5: ffff88817e8f1260 (&dev->cache.rb_lock){+.+.}-{3:3}, at: mlx5_mkey_cache_cleanup+0x77/0x250 [mlx5_ib]
+
+Fixes: b95845178328 ("RDMA/mlx5: Change the cache structure to an RB-tree")
+Signed-off-by: Shay Drory <shayd@nvidia.com>
+Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
+Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 +
+ drivers/infiniband/hw/mlx5/mr.c | 16 ++++++++++++++--
+ 2 files changed, 15 insertions(+), 2 deletions(-)
+
+--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
++++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
+@@ -821,6 +821,7 @@ struct mlx5_mkey_cache {
+ struct dentry *fs_root;
+ unsigned long last_add;
+ struct delayed_work remove_ent_dwork;
++ u8 disable: 1;
+ };
+
+ struct mlx5_ib_port_resources {
+--- a/drivers/infiniband/hw/mlx5/mr.c
++++ b/drivers/infiniband/hw/mlx5/mr.c
+@@ -994,19 +994,27 @@ int mlx5_mkey_cache_cleanup(struct mlx5_
+ if (!dev->cache.wq)
+ return 0;
+
+- cancel_delayed_work_sync(&dev->cache.remove_ent_dwork);
+ mutex_lock(&dev->cache.rb_lock);
++ dev->cache.disable = true;
+ for (node = rb_first(root); node; node = rb_next(node)) {
+ ent = rb_entry(node, struct mlx5_cache_ent, node);
+ spin_lock_irq(&ent->mkeys_queue.lock);
+ ent->disabled = true;
+ spin_unlock_irq(&ent->mkeys_queue.lock);
+- cancel_delayed_work_sync(&ent->dwork);
+ }
++ mutex_unlock(&dev->cache.rb_lock);
++
++ /*
++ * After all entries are disabled and will not reschedule on WQ,
++ * flush it and all async commands.
++ */
++ flush_workqueue(dev->cache.wq);
+
+ mlx5_mkey_cache_debugfs_cleanup(dev);
+ mlx5_cmd_cleanup_async_ctx(&dev->async_ctx);
+
++ /* At this point all entries are disabled and have no concurrent work. */
++ mutex_lock(&dev->cache.rb_lock);
+ node = rb_first(root);
+ while (node) {
+ ent = rb_entry(node, struct mlx5_cache_ent, node);
+@@ -1789,6 +1797,10 @@ static int cache_ent_find_and_store(stru
+ }
+
+ mutex_lock(&cache->rb_lock);
++ if (cache->disable) {
++ mutex_unlock(&cache->rb_lock);
++ return 0;
++ }
+ ent = mkey_cache_ent_from_rb_key(dev, mr->mmkey.rb_key);
+ if (ent) {
+ if (ent->rb_key.ndescs == mr->mmkey.rb_key.ndescs) {
--- /dev/null
+From 828cf5936bea2438c21a3a6c303b34a2a1f6c3c2 Mon Sep 17 00:00:00 2001
+From: Leon Romanovsky <leonro@nvidia.com>
+Date: Thu, 2 Feb 2023 11:03:06 +0200
+Subject: RDMA/mlx5: Fix MR cache debugfs error in IB representors mode
+
+From: Leon Romanovsky <leonro@nvidia.com>
+
+commit 828cf5936bea2438c21a3a6c303b34a2a1f6c3c2 upstream.
+
+Block MR cache debugfs creation for IB representor flow as MR cache shouldn't be used
+at all in that mode. As part of this change, add missing debugfs cleanup in error path
+too.
+
+This change fixes the following debugfs errors:
+
+ bond0: (slave enp8s0f1): Enslaving as a backup interface with an up link
+ mlx5_core 0000:08:00.0: lag map: port 1:1 port 2:1
+ mlx5_core 0000:08:00.0: shared_fdb:1 mode:queue_affinity
+ mlx5_core 0000:08:00.0: Operation mode is single FDB
+ debugfs: Directory '2' with parent '/' already present!
+...
+ debugfs: Directory '22' with parent '/' already present!
+
+Fixes: 73d09b2fe833 ("RDMA/mlx5: Introduce mlx5r_cache_rb_key")
+Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
+Link: https://lore.kernel.org/r/482a78c54acbcfa1742a0e06a452546428900ffa.1675328463.git.leon@kernel.org
+Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/hw/mlx5/mr.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/drivers/infiniband/hw/mlx5/mr.c
++++ b/drivers/infiniband/hw/mlx5/mr.c
+@@ -789,6 +789,9 @@ static void mlx5_mkey_cache_debugfs_add_
+ int order = order_base_2(ent->rb_key.ndescs);
+ struct dentry *dir;
+
++ if (!mlx5_debugfs_root || dev->is_rep)
++ return;
++
+ if (ent->rb_key.access_mode == MLX5_MKC_ACCESS_MODE_KSM)
+ order = MLX5_IMR_KSM_CACHE_ENTRY + 2;
+
+@@ -977,6 +980,7 @@ int mlx5_mkey_cache_init(struct mlx5_ib_
+
+ err:
+ mutex_unlock(&cache->rb_lock);
++ mlx5_mkey_cache_debugfs_cleanup(dev);
+ mlx5_ib_warn(dev, "failed to create mkey cache entry\n");
+ return ret;
+ }
--- /dev/null
+From f637040c3339a2ed8c12d65ad03f9552386e2fe7 Mon Sep 17 00:00:00 2001
+From: Jason Gunthorpe <jgg@nvidia.com>
+Date: Tue, 28 May 2024 15:52:53 +0300
+Subject: RDMA/mlx5: Follow rb_key.ats when creating new mkeys
+
+From: Jason Gunthorpe <jgg@nvidia.com>
+
+commit f637040c3339a2ed8c12d65ad03f9552386e2fe7 upstream.
+
+When a cache ent already exists but doesn't have any mkeys in it the cache
+will automatically create a new one based on the specification in the
+ent->rb_key.
+
+ent->ats was missed when creating the new key and so ma_translation_mode
+was not being set even though the ent requires it.
+
+Cc: stable@vger.kernel.org
+Fixes: 73d09b2fe833 ("RDMA/mlx5: Introduce mlx5r_cache_rb_key")
+Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
+Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
+Link: https://lore.kernel.org/r/7c5613458ecb89fbe5606b7aa4c8d990bdea5b9a.1716900410.git.leon@kernel.org
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/hw/mlx5/mr.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/infiniband/hw/mlx5/mr.c
++++ b/drivers/infiniband/hw/mlx5/mr.c
+@@ -243,6 +243,7 @@ static void set_cache_mkc(struct mlx5_ca
+ MLX5_SET(mkc, mkc, access_mode_1_0, ent->rb_key.access_mode & 0x3);
+ MLX5_SET(mkc, mkc, access_mode_4_2,
+ (ent->rb_key.access_mode >> 2) & 0x7);
++ MLX5_SET(mkc, mkc, ma_translation_mode, !!ent->rb_key.ats);
+
+ MLX5_SET(mkc, mkc, translations_octword_size,
+ get_mkc_octo_size(ent->rb_key.access_mode,
--- /dev/null
+From ee6d57a2e13d11ce9050cfc3e3b69ef707a44a63 Mon Sep 17 00:00:00 2001
+From: Michael Guralnik <michaelgur@nvidia.com>
+Date: Tue, 3 Sep 2024 14:24:49 +0300
+Subject: RDMA/mlx5: Limit usage of over-sized mkeys from the MR cache
+
+From: Michael Guralnik <michaelgur@nvidia.com>
+
+commit ee6d57a2e13d11ce9050cfc3e3b69ef707a44a63 upstream.
+
+When searching the MR cache for suitable cache entries, don't use mkeys
+larger than twice the size required for the MR.
+This should ensure the usage of mkeys closer to the minimal required size
+and reduce memory waste.
+
+On driver init we create entries for mkeys with clear attributes and
+powers of 2 sizes from 4 to the max supported size.
+This solves the issue for anyone using mkeys that fit these
+requirements.
+
+In the use case where an MR is registered with different attributes,
+like an access flag we can't UMR, we'll create a new cache entry to store
+it upon dereg.
+Without this fix, any later registration with same attributes and smaller
+size will use the newly created cache entry and it's mkeys, disregarding
+the memory waste of using mkeys larger than required.
+
+For example, one worst-case scenario can be when registering and
+deregistering a 1GB mkey with ATS enabled which will cause the creation of
+a new cache entry to hold those type of mkeys. A user registering a 4k MR
+with ATS will end up using the new cache entry and an mkey that can
+support a 1GB MR, thus wasting x250k memory than actually needed in the HW.
+
+Additionally, allow all small registration to use the smallest size
+cache entry that is initialized on driver load even if size is larger
+than twice the required size.
+
+Fixes: 73d09b2fe833 ("RDMA/mlx5: Introduce mlx5r_cache_rb_key")
+Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
+Link: https://patch.msgid.link/8ba3a6e3748aace2026de8b83da03aba084f78f4.1725362530.git.leon@kernel.org
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/hw/mlx5/mr.c | 14 ++++++++++++--
+ 1 file changed, 12 insertions(+), 2 deletions(-)
+
+--- a/drivers/infiniband/hw/mlx5/mr.c
++++ b/drivers/infiniband/hw/mlx5/mr.c
+@@ -48,6 +48,7 @@ enum {
+ MAX_PENDING_REG_MR = 8,
+ };
+
++#define MLX5_MR_CACHE_PERSISTENT_ENTRY_MIN_DESCS 4
+ #define MLX5_UMR_ALIGN 2048
+
+ static void
+@@ -656,6 +657,7 @@ mkey_cache_ent_from_rb_key(struct mlx5_i
+ {
+ struct rb_node *node = dev->cache.rb_root.rb_node;
+ struct mlx5_cache_ent *cur, *smallest = NULL;
++ u64 ndescs_limit;
+ int cmp;
+
+ /*
+@@ -674,10 +676,18 @@ mkey_cache_ent_from_rb_key(struct mlx5_i
+ return cur;
+ }
+
++ /*
++ * Limit the usage of mkeys larger than twice the required size while
++ * also allowing the usage of smallest cache entry for small MRs.
++ */
++ ndescs_limit = max_t(u64, rb_key.ndescs * 2,
++ MLX5_MR_CACHE_PERSISTENT_ENTRY_MIN_DESCS);
++
+ return (smallest &&
+ smallest->rb_key.access_mode == rb_key.access_mode &&
+ smallest->rb_key.access_flags == rb_key.access_flags &&
+- smallest->rb_key.ats == rb_key.ats) ?
++ smallest->rb_key.ats == rb_key.ats &&
++ smallest->rb_key.ndescs <= ndescs_limit) ?
+ smallest :
+ NULL;
+ }
+@@ -958,7 +968,7 @@ int mlx5_mkey_cache_init(struct mlx5_ib_
+ mlx5_mkey_cache_debugfs_init(dev);
+ mutex_lock(&cache->rb_lock);
+ for (i = 0; i <= mkey_cache_max_order(dev); i++) {
+- rb_key.ndescs = 1 << (i + 2);
++ rb_key.ndescs = MLX5_MR_CACHE_PERSISTENT_ENTRY_MIN_DESCS << i;
+ ent = mlx5r_cache_create_ent_locked(dev, rb_key, true);
+ if (IS_ERR(ent)) {
+ ret = PTR_ERR(ent);
--- /dev/null
+From c1eb2512596fb3542357bb6c34c286f5e0374538 Mon Sep 17 00:00:00 2001
+From: Jason Gunthorpe <jgg@nvidia.com>
+Date: Tue, 28 May 2024 15:52:52 +0300
+Subject: RDMA/mlx5: Remove extra unlock on error path
+
+From: Jason Gunthorpe <jgg@nvidia.com>
+
+commit c1eb2512596fb3542357bb6c34c286f5e0374538 upstream.
+
+The below commit lifted the locking out of this function but left this
+error path unlock behind resulting in unbalanced locking. Remove the
+missed unlock too.
+
+Cc: stable@vger.kernel.org
+Fixes: 627122280c87 ("RDMA/mlx5: Add work to remove temporary entries from the cache")
+Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
+Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
+Link: https://lore.kernel.org/r/78090c210c750f47219b95248f9f782f34548bb1.1716900410.git.leon@kernel.org
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/hw/mlx5/mr.c | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+--- a/drivers/infiniband/hw/mlx5/mr.c
++++ b/drivers/infiniband/hw/mlx5/mr.c
+@@ -638,10 +638,8 @@ static int mlx5_cache_ent_insert(struct
+ new = &((*new)->rb_left);
+ if (cmp < 0)
+ new = &((*new)->rb_right);
+- if (cmp == 0) {
+- mutex_unlock(&cache->rb_lock);
++ if (cmp == 0)
+ return -EEXIST;
+- }
+ }
+
+ /* Add new node and rebalance tree. */
--- /dev/null
+From 0611a8e8b475fc5230b9a24d29c8397aaab20b63 Mon Sep 17 00:00:00 2001
+From: Or Har-Toov <ohartoov@nvidia.com>
+Date: Wed, 3 Apr 2024 13:35:59 +0300
+Subject: RDMA/mlx5: Uncacheable mkey has neither rb_key or cache_ent
+
+From: Or Har-Toov <ohartoov@nvidia.com>
+
+commit 0611a8e8b475fc5230b9a24d29c8397aaab20b63 upstream.
+
+As some mkeys can't be modified with UMR due to some UMR limitations,
+like the size of translation that can be updated, not all user mkeys can
+be cached.
+
+Fixes: dd1b913fb0d0 ("RDMA/mlx5: Cache all user cacheable mkeys on dereg MR flow")
+Signed-off-by: Or Har-Toov <ohartoov@nvidia.com>
+Link: https://lore.kernel.org/r/f2742dd934ed73b2d32c66afb8e91b823063880c.1712140377.git.leon@kernel.org
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
++++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
+@@ -651,7 +651,7 @@ struct mlx5_ib_mkey {
+ unsigned int ndescs;
+ struct wait_queue_head wait;
+ refcount_t usecount;
+- /* User Mkey must hold either a rb_key or a cache_ent. */
++ /* Cacheable user Mkey must hold either a rb_key or a cache_ent. */
+ struct mlx5r_cache_rb_key rb_key;
+ struct mlx5_cache_ent *cache_ent;
+ };
squashfs-check-the-inode-number-is-not-the-invalid-value-of-zero.patch
pfifo_tail_enqueue-drop-new-packet-when-sch-limit-0.patch
media-mtk-vcodec-potential-null-pointer-deference-in-scp.patch
+rdma-mlx5-fix-mr-cache-debugfs-error-in-ib-representors-mode.patch
+rdma-mlx5-check-reg_create-create-for-errors.patch
+rdma-mlx5-fix-mkey-cache-possible-deadlock-on-cleanup.patch
+rdma-mlx5-fix-assigning-access-flags-to-cache-mkeys.patch
+rdma-mlx5-uncacheable-mkey-has-neither-rb_key-or-cache_ent.patch
+rdma-mlx5-change-check-for-cacheable-mkeys.patch
+rdma-mlx5-remove-extra-unlock-on-error-path.patch
+rdma-mlx5-follow-rb_key.ats-when-creating-new-mkeys.patch
+rdma-mlx5-ensure-created-mkeys-always-have-a-populated-rb_key.patch
+rdma-mlx5-fix-counter-update-on-mr-cache-mkey-creation.patch
+rdma-mlx5-limit-usage-of-over-sized-mkeys-from-the-mr-cache.patch