Fixes for 6.13

author Sasha Levin <sashal@kernel.org>

Fri, 28 Feb 2025 04:41:20 +0000 (23:41 -0500)

committer Sasha Levin <sashal@kernel.org>

Fri, 28 Feb 2025 04:41:20 +0000 (23:41 -0500)
author Sasha Levin <sashal@kernel.org>
Fri, 28 Feb 2025 04:41:20 +0000 (23:41 -0500)
committer Sasha Levin <sashal@kernel.org>
Fri, 28 Feb 2025 04:41:20 +0000 (23:41 -0500)
diff --git a/queue-6.13/ib-mlx5-set-and-get-correct-qp_num-for-a-dct-qp.patch b/queue-6.13/ib-mlx5-set-and-get-correct-qp_num-for-a-dct-qp.patch

new file mode 100644 (file)

index 0000000..bca504a
--- /dev/null
+++ b/queue-6.13/ib-mlx5-set-and-get-correct-qp_num-for-a-dct-qp.patch
@@ -0,0 +1,50 @@
+From ee857ac8a1124a5c61f50a14a537eff9c6f0b30d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 19 Jan 2025 14:39:46 +0200
+Subject: IB/mlx5: Set and get correct qp_num for a DCT QP
+
+From: Mark Zhang <markzhang@nvidia.com>
+
+[ Upstream commit 12d044770e12c4205fa69535b4fa8a9981fea98f ]
+
+When a DCT QP is created on an active lag, it's dctc.port is assigned
+in a round-robin way, which is from 1 to dev->lag_port. In this case
+when querying this QP, we may get qp_attr.port_num > 2.
+Fix this by setting qp->port when modifying a DCT QP, and read port_num
+from qp->port instead of dctc.port when querying it.
+
+Fixes: 7c4b1ab9f167 ("IB/mlx5: Add DCT RoCE LAG support")
+Signed-off-by: Mark Zhang <markzhang@nvidia.com>
+Reviewed-by: Maher Sanalla <msanalla@nvidia.com>
+Link: https://patch.msgid.link/94c76bf0adbea997f87ffa27674e0a7118ad92a9.1737290358.git.leon@kernel.org
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/infiniband/hw/mlx5/qp.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
+index a43eba9d3572c..08d22db8dca91 100644
+--- a/drivers/infiniband/hw/mlx5/qp.c
++++ b/drivers/infiniband/hw/mlx5/qp.c
+@@ -4579,6 +4579,8 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ 
+               set_id = mlx5_ib_get_counters_id(dev, attr->port_num - 1);
+               MLX5_SET(dctc, dctc, counter_set_id, set_id);
++
++              qp->port = attr->port_num;
+       } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
+               struct mlx5_ib_modify_qp_resp resp = {};
+               u32 out[MLX5_ST_SZ_DW(create_dct_out)] = {};
+@@ -5074,7 +5076,7 @@ static int mlx5_ib_dct_query_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *mqp,
+       }
+ 
+       if (qp_attr_mask & IB_QP_PORT)
+-              qp_attr->port_num = MLX5_GET(dctc, dctc, port);
++              qp_attr->port_num = mqp->port;
+       if (qp_attr_mask & IB_QP_MIN_RNR_TIMER)
+               qp_attr->min_rnr_timer = MLX5_GET(dctc, dctc, min_rnr_nak);
+       if (qp_attr_mask & IB_QP_AV) {
+-- 
+2.39.5
+
diff --git a/queue-6.13/landlock-fix-non-tcp-sockets-restriction.patch b/queue-6.13/landlock-fix-non-tcp-sockets-restriction.patch

new file mode 100644 (file)

index 0000000..e1c91f6
--- /dev/null
+++ b/queue-6.13/landlock-fix-non-tcp-sockets-restriction.patch
@@ -0,0 +1,64 @@
+From 0fd8fa6a48e05bdb41f2d896f75adbb3d8b88ea0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 5 Feb 2025 17:36:49 +0800
+Subject: landlock: Fix non-TCP sockets restriction
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Mikhail Ivanov <ivanov.mikhail1@huawei-partners.com>
+
+[ Upstream commit 854277e2cc8c75dc3c216c82e72523258fcf65b9 ]
+
+Use sk_is_tcp() to check if socket is TCP in bind(2) and connect(2)
+hooks.
+
+SMC, MPTCP, SCTP protocols are currently restricted by TCP access
+rights.  The purpose of TCP access rights is to provide control over
+ports that can be used by userland to establish a TCP connection.
+Therefore, it is incorrect to deny bind(2) and connect(2) requests for a
+socket of another protocol.
+
+However, SMC, MPTCP and RDS implementations use TCP internal sockets to
+establish communication or even to exchange packets over a TCP
+connection [1]. Landlock rules that configure bind(2) and connect(2)
+usage for TCP sockets should not cover requests for sockets of such
+protocols. These protocols have different set of security issues and
+security properties, therefore, it is necessary to provide the userland
+with the ability to distinguish between them (eg. [2]).
+
+Control over TCP connection used by other protocols can be achieved with
+upcoming support of socket creation control [3].
+
+[1] https://lore.kernel.org/all/62336067-18c2-3493-d0ec-6dd6a6d3a1b5@huawei-partners.com/
+[2] https://lore.kernel.org/all/20241204.fahVio7eicim@digikod.net/
+[3] https://lore.kernel.org/all/20240904104824.1844082-1-ivanov.mikhail1@huawei-partners.com/
+
+Closes: https://github.com/landlock-lsm/linux/issues/40
+Fixes: fff69fb03dde ("landlock: Support network rules with TCP bind and connect")
+Signed-off-by: Mikhail Ivanov <ivanov.mikhail1@huawei-partners.com>
+Link: https://lore.kernel.org/r/20250205093651.1424339-2-ivanov.mikhail1@huawei-partners.com
+[mic: Format commit message to 72 columns]
+Signed-off-by: Mickaël Salaün <mic@digikod.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ security/landlock/net.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/security/landlock/net.c b/security/landlock/net.c
+index d5dcc4407a197..104b6c01fe503 100644
+--- a/security/landlock/net.c
++++ b/security/landlock/net.c
+@@ -63,8 +63,7 @@ static int current_check_access_socket(struct socket *const sock,
+       if (WARN_ON_ONCE(dom->num_layers < 1))
+               return -EACCES;
+ 
+-      /* Checks if it's a (potential) TCP socket. */
+-      if (sock->type != SOCK_STREAM)
++      if (!sk_is_tcp(sock->sk))
+               return 0;
+ 
+       /* Checks for minimal header length to safely read sa_family. */
+-- 
+2.39.5
+
diff --git a/queue-6.13/nfs-adjust-delegated-timestamps-for-o_direct-reads-a.patch b/queue-6.13/nfs-adjust-delegated-timestamps-for-o_direct-reads-a.patch

new file mode 100644 (file)

index 0000000..562fb91
--- /dev/null
+++ b/queue-6.13/nfs-adjust-delegated-timestamps-for-o_direct-reads-a.patch
@@ -0,0 +1,52 @@
+From 306f3ce15cb7bc53ff21c8d20edf8b6208ce65ee Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 1 Feb 2025 14:59:03 -0500
+Subject: NFS: Adjust delegated timestamps for O_DIRECT reads and writes
+
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+
+[ Upstream commit 88025c67fe3c025a0123bc7af50535b97f7af89a ]
+
+Adjust the timestamps if O_DIRECT is being combined with attribute
+delegations.
+
+Fixes: e12912d94137 ("NFSv4: Add support for delegated atime and mtime attributes")
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfs/direct.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
+index 8f2aa2bf45659..6a6e758841336 100644
+--- a/fs/nfs/direct.c
++++ b/fs/nfs/direct.c
+@@ -56,6 +56,7 @@
+ #include <linux/uaccess.h>
+ #include <linux/atomic.h>
+ 
++#include "delegation.h"
+ #include "internal.h"
+ #include "iostat.h"
+ #include "pnfs.h"
+@@ -286,6 +287,8 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
+       nfs_direct_count_bytes(dreq, hdr);
+       spin_unlock(&dreq->lock);
+ 
++      nfs_update_delegated_atime(dreq->inode);
++
+       while (!list_empty(&hdr->pages)) {
+               struct nfs_page *req = nfs_list_entry(hdr->pages.next);
+               struct page *page = req->wb_page;
+@@ -778,6 +781,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
+ 
+       spin_lock(&inode->i_lock);
+       nfs_direct_file_adjust_size_locked(inode, dreq->io_start, dreq->count);
++      nfs_update_delegated_mtime_locked(dreq->inode);
+       spin_unlock(&inode->i_lock);
+ 
+       while (!list_empty(&hdr->pages)) {
+-- 
+2.39.5
+
diff --git a/queue-6.13/nfs-o_direct-writes-must-check-and-adjust-the-file-l.patch b/queue-6.13/nfs-o_direct-writes-must-check-and-adjust-the-file-l.patch

new file mode 100644 (file)

index 0000000..aea52d8
--- /dev/null
+++ b/queue-6.13/nfs-o_direct-writes-must-check-and-adjust-the-file-l.patch
@@ -0,0 +1,72 @@
+From 1321c67fc068c270ac22d55313c363ed20c1d9c5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 1 Feb 2025 14:59:02 -0500
+Subject: NFS: O_DIRECT writes must check and adjust the file length
+
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+
+[ Upstream commit fcf857ee1958e9247298251f7615d0c76f1e9b38 ]
+
+While it is uncommon for delegations to be held while O_DIRECT writes
+are in progress, it is possible. The xfstests generic/647 and
+generic/729 both end up triggering that state, and end up failing due to
+the fact that the file size is not adjusted.
+
+Reported-by: Chuck Lever <chuck.lever@oracle.com>
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=219738
+Cc: stable@vger.kernel.org
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
+Stable-dep-of: 88025c67fe3c ("NFS: Adjust delegated timestamps for O_DIRECT reads and writes")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfs/direct.c | 19 +++++++++++++++++++
+ 1 file changed, 19 insertions(+)
+
+diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
+index b08dbe96bc579..8f2aa2bf45659 100644
+--- a/fs/nfs/direct.c
++++ b/fs/nfs/direct.c
+@@ -130,6 +130,20 @@ static void nfs_direct_truncate_request(struct nfs_direct_req *dreq,
+               dreq->count = req_start;
+ }
+ 
++static void nfs_direct_file_adjust_size_locked(struct inode *inode,
++                                             loff_t offset, size_t count)
++{
++      loff_t newsize = offset + (loff_t)count;
++      loff_t oldsize = i_size_read(inode);
++
++      if (newsize > oldsize) {
++              i_size_write(inode, newsize);
++              NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_SIZE;
++              trace_nfs_size_grow(inode, newsize);
++              nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
++      }
++}
++
+ /**
+  * nfs_swap_rw - NFS address space operation for swap I/O
+  * @iocb: target I/O control block
+@@ -740,6 +754,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
+       struct nfs_direct_req *dreq = hdr->dreq;
+       struct nfs_commit_info cinfo;
+       struct nfs_page *req = nfs_list_entry(hdr->pages.next);
++      struct inode *inode = dreq->inode;
+       int flags = NFS_ODIRECT_DONE;
+ 
+       trace_nfs_direct_write_completion(dreq);
+@@ -761,6 +776,10 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
+       }
+       spin_unlock(&dreq->lock);
+ 
++      spin_lock(&inode->i_lock);
++      nfs_direct_file_adjust_size_locked(inode, dreq->io_start, dreq->count);
++      spin_unlock(&inode->i_lock);
++
+       while (!list_empty(&hdr->pages)) {
+ 
+               req = nfs_list_entry(hdr->pages.next);
+-- 
+2.39.5
+
diff --git a/queue-6.13/nfsv4-fix-a-deadlock-when-recovering-state-on-a-sill.patch b/queue-6.13/nfsv4-fix-a-deadlock-when-recovering-state-on-a-sill.patch

new file mode 100644 (file)

index 0000000..b2f6486
--- /dev/null
+++ b/queue-6.13/nfsv4-fix-a-deadlock-when-recovering-state-on-a-sill.patch
@@ -0,0 +1,108 @@
+From 9edd888627536937dfa4787ec12858e2bd66dda2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 1 Feb 2025 15:00:09 -0500
+Subject: NFSv4: Fix a deadlock when recovering state on a sillyrenamed file
+
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+
+[ Upstream commit 8f8df955f078e1a023ee55161935000a67651f38 ]
+
+If the file is sillyrenamed, and slated for delete on close, it is
+possible for a server reboot to triggeer an open reclaim, with can again
+race with the application call to close(). When that happens, the call
+to put_nfs_open_context() can trigger a synchronous delegreturn call
+which deadlocks because it is not marked as privileged.
+
+Instead, ensure that the call to nfs4_inode_return_delegation_on_close()
+catches the delegreturn, and schedules it asynchronously.
+
+Reported-by: Li Lingfeng <lilingfeng3@huawei.com>
+Fixes: adb4b42d19ae ("Return the delegation when deleting sillyrenamed files")
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfs/delegation.c | 37 +++++++++++++++++++++++++++++++++++++
+ fs/nfs/delegation.h |  1 +
+ fs/nfs/nfs4proc.c   |  3 +++
+ 3 files changed, 41 insertions(+)
+
+diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
+index 035ba52742a50..4db912f562305 100644
+--- a/fs/nfs/delegation.c
++++ b/fs/nfs/delegation.c
+@@ -780,6 +780,43 @@ int nfs4_inode_return_delegation(struct inode *inode)
+       return 0;
+ }
+ 
++/**
++ * nfs4_inode_set_return_delegation_on_close - asynchronously return a delegation
++ * @inode: inode to process
++ *
++ * This routine is called to request that the delegation be returned as soon
++ * as the file is closed. If the file is already closed, the delegation is
++ * immediately returned.
++ */
++void nfs4_inode_set_return_delegation_on_close(struct inode *inode)
++{
++      struct nfs_delegation *delegation;
++      struct nfs_delegation *ret = NULL;
++
++      if (!inode)
++              return;
++      rcu_read_lock();
++      delegation = nfs4_get_valid_delegation(inode);
++      if (!delegation)
++              goto out;
++      spin_lock(&delegation->lock);
++      if (!delegation->inode)
++              goto out_unlock;
++      if (list_empty(&NFS_I(inode)->open_files) &&
++          !test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) {
++              /* Refcount matched in nfs_end_delegation_return() */
++              ret = nfs_get_delegation(delegation);
++      } else
++              set_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags);
++out_unlock:
++      spin_unlock(&delegation->lock);
++      if (ret)
++              nfs_clear_verifier_delegated(inode);
++out:
++      rcu_read_unlock();
++      nfs_end_delegation_return(inode, ret, 0);
++}
++
+ /**
+  * nfs4_inode_return_delegation_on_close - asynchronously return a delegation
+  * @inode: inode to process
+diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
+index 71524d34ed207..8ff5ab9c5c256 100644
+--- a/fs/nfs/delegation.h
++++ b/fs/nfs/delegation.h
+@@ -49,6 +49,7 @@ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred,
+                                 unsigned long pagemod_limit, u32 deleg_type);
+ int nfs4_inode_return_delegation(struct inode *inode);
+ void nfs4_inode_return_delegation_on_close(struct inode *inode);
++void nfs4_inode_set_return_delegation_on_close(struct inode *inode);
+ int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
+ void nfs_inode_evict_delegation(struct inode *inode);
+ 
+diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
+index 405f17e6e0b45..e7bc99c69743c 100644
+--- a/fs/nfs/nfs4proc.c
++++ b/fs/nfs/nfs4proc.c
+@@ -3898,8 +3898,11 @@ nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx,
+ 
+ static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync)
+ {
++      struct dentry *dentry = ctx->dentry;
+       if (ctx->state == NULL)
+               return;
++      if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
++              nfs4_inode_set_return_delegation_on_close(d_inode(dentry));
+       if (is_sync)
+               nfs4_close_sync(ctx->state, _nfs4_ctx_to_openmode(ctx));
+       else
+-- 
+2.39.5
+
diff --git a/queue-6.13/ovl-fix-uaf-in-ovl_dentry_update_reval-by-moving-dpu.patch b/queue-6.13/ovl-fix-uaf-in-ovl_dentry_update_reval-by-moving-dpu.patch

new file mode 100644 (file)

index 0000000..964fc07
--- /dev/null
+++ b/queue-6.13/ovl-fix-uaf-in-ovl_dentry_update_reval-by-moving-dpu.patch
@@ -0,0 +1,71 @@
+From d15e47d71bc6364310192d85fef40cbb0130a37e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 15 Feb 2025 00:51:48 +0300
+Subject: ovl: fix UAF in ovl_dentry_update_reval by moving dput() in
+ ovl_link_up
+
+From: Vasiliy Kovalev <kovalev@altlinux.org>
+
+[ Upstream commit c84e125fff2615b4d9c259e762596134eddd2f27 ]
+
+The issue was caused by dput(upper) being called before
+ovl_dentry_update_reval(), while upper->d_flags was still
+accessed in ovl_dentry_remote().
+
+Move dput(upper) after its last use to prevent use-after-free.
+
+BUG: KASAN: slab-use-after-free in ovl_dentry_remote fs/overlayfs/util.c:162 [inline]
+BUG: KASAN: slab-use-after-free in ovl_dentry_update_reval+0xd2/0xf0 fs/overlayfs/util.c:167
+
+Call Trace:
+ <TASK>
+ __dump_stack lib/dump_stack.c:88 [inline]
+ dump_stack_lvl+0x116/0x1f0 lib/dump_stack.c:114
+ print_address_description mm/kasan/report.c:377 [inline]
+ print_report+0xc3/0x620 mm/kasan/report.c:488
+ kasan_report+0xd9/0x110 mm/kasan/report.c:601
+ ovl_dentry_remote fs/overlayfs/util.c:162 [inline]
+ ovl_dentry_update_reval+0xd2/0xf0 fs/overlayfs/util.c:167
+ ovl_link_up fs/overlayfs/copy_up.c:610 [inline]
+ ovl_copy_up_one+0x2105/0x3490 fs/overlayfs/copy_up.c:1170
+ ovl_copy_up_flags+0x18d/0x200 fs/overlayfs/copy_up.c:1223
+ ovl_rename+0x39e/0x18c0 fs/overlayfs/dir.c:1136
+ vfs_rename+0xf84/0x20a0 fs/namei.c:4893
+...
+ </TASK>
+
+Fixes: b07d5cc93e1b ("ovl: update of dentry revalidate flags after copy up")
+Reported-by: syzbot+316db8a1191938280eb6@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=316db8a1191938280eb6
+Signed-off-by: Vasiliy Kovalev <kovalev@altlinux.org>
+Link: https://lore.kernel.org/r/20250214215148.761147-1-kovalev@altlinux.org
+Reviewed-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/overlayfs/copy_up.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
+index 0c28e5fa34077..d7310fcf38881 100644
+--- a/fs/overlayfs/copy_up.c
++++ b/fs/overlayfs/copy_up.c
+@@ -618,7 +618,6 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c)
+       err = PTR_ERR(upper);
+       if (!IS_ERR(upper)) {
+               err = ovl_do_link(ofs, ovl_dentry_upper(c->dentry), udir, upper);
+-              dput(upper);
+ 
+               if (!err) {
+                       /* Restore timestamps on parent (best effort) */
+@@ -626,6 +625,7 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c)
+                       ovl_dentry_set_upper_alias(c->dentry);
+                       ovl_dentry_update_reval(c->dentry, upper);
+               }
++              dput(upper);
+       }
+       inode_unlock(udir);
+       if (err)
+-- 
+2.39.5
+
diff --git a/queue-6.13/rdma-bnxt_re-add-sanity-checks-on-rdev-validity.patch b/queue-6.13/rdma-bnxt_re-add-sanity-checks-on-rdev-validity.patch

new file mode 100644 (file)

index 0000000..7327421
--- /dev/null
+++ b/queue-6.13/rdma-bnxt_re-add-sanity-checks-on-rdev-validity.patch
@@ -0,0 +1,57 @@
+From 0a2f5037c9e5dec2212bf34a830cf40b61c3a1fc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Feb 2025 00:21:23 -0800
+Subject: RDMA/bnxt_re: Add sanity checks on rdev validity
+
+From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
+
+[ Upstream commit f0df225d12fcb049429fb5bf5122afe143c2dd15 ]
+
+There is a possibility that ulp_irq_stop and ulp_irq_start
+callbacks will be called when the device is in detached state.
+This can cause a crash due to NULL pointer dereference as
+the rdev is already freed.
+
+Fixes: cc5b9b48d447 ("RDMA/bnxt_re: Recover the device when FW error is detected")
+Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
+Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
+Link: https://patch.msgid.link/1738657285-23968-3-git-send-email-selvin.xavier@broadcom.com
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/infiniband/hw/bnxt_re/main.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
+index c143f273b7596..7368b0482bb87 100644
+--- a/drivers/infiniband/hw/bnxt_re/main.c
++++ b/drivers/infiniband/hw/bnxt_re/main.c
+@@ -321,6 +321,8 @@ static void bnxt_re_stop_irq(void *handle)
+       int indx;
+ 
+       rdev = en_info->rdev;
++      if (!rdev)
++              return;
+       rcfw = &rdev->rcfw;
+ 
+       for (indx = BNXT_RE_NQ_IDX; indx < rdev->nqr->num_msix; indx++) {
+@@ -341,6 +343,8 @@ static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent)
+       int indx, rc;
+ 
+       rdev = en_info->rdev;
++      if (!rdev)
++              return;
+       msix_ent = rdev->nqr->msix_entries;
+       rcfw = &rdev->rcfw;
+       if (!ent) {
+@@ -2356,6 +2360,7 @@ static int bnxt_re_suspend(struct auxiliary_device *adev, pm_message_t state)
+       ibdev_info(&rdev->ibdev, "%s: L2 driver notified to stop en_state 0x%lx",
+                  __func__, en_dev->en_state);
+       bnxt_re_remove_device(rdev, BNXT_RE_PRE_RECOVERY_REMOVE, adev);
++      bnxt_re_update_en_info_rdev(NULL, en_info, adev);
+       mutex_unlock(&bnxt_re_mutex);
+ 
+       return 0;
+-- 
+2.39.5
+
diff --git a/queue-6.13/rdma-bnxt_re-allocate-dev_attr-information-dynamical.patch b/queue-6.13/rdma-bnxt_re-allocate-dev_attr-information-dynamical.patch

new file mode 100644 (file)

index 0000000..e44b67c
--- /dev/null
+++ b/queue-6.13/rdma-bnxt_re-allocate-dev_attr-information-dynamical.patch
@@ -0,0 +1,399 @@
+From ab30486eb656ea09d6f0f4181f55e243db27af21 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 9 Jan 2025 10:18:13 -0800
+Subject: RDMA/bnxt_re: Allocate dev_attr information dynamically
+
+From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
+
+[ Upstream commit 9264cd6aa8f194753507cb6e1f444141e7c79f48 ]
+
+In order to optimize the size of driver private structure,
+the memory for dev_attr is allocated dynamically during the
+chip context initialization. In order to make certain runtime
+decisions, store dev_attr in the qplib_res structure.
+
+Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
+Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
+Link: https://patch.msgid.link/1736446693-6692-3-git-send-email-selvin.xavier@broadcom.com
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Stable-dep-of: 8238c7bd8420 ("RDMA/bnxt_re: Fix the statistics for Gen P7 VF")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/infiniband/hw/bnxt_re/bnxt_re.h     |  2 +-
+ drivers/infiniband/hw/bnxt_re/hw_counters.c |  2 +-
+ drivers/infiniband/hw/bnxt_re/ib_verbs.c    | 38 ++++++++++-----------
+ drivers/infiniband/hw/bnxt_re/main.c        | 36 ++++++++++++-------
+ drivers/infiniband/hw/bnxt_re/qplib_res.c   |  7 ++--
+ drivers/infiniband/hw/bnxt_re/qplib_res.h   |  4 +--
+ drivers/infiniband/hw/bnxt_re/qplib_sp.c    |  4 +--
+ drivers/infiniband/hw/bnxt_re/qplib_sp.h    |  3 +-
+ 8 files changed, 51 insertions(+), 45 deletions(-)
+
+diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+index 2975b11b79bf7..22c98c155bd3c 100644
+--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
++++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+@@ -204,7 +204,7 @@ struct bnxt_re_dev {
+       struct bnxt_re_nq_record        *nqr;
+ 
+       /* Device Resources */
+-      struct bnxt_qplib_dev_attr      dev_attr;
++      struct bnxt_qplib_dev_attr      *dev_attr;
+       struct bnxt_qplib_ctx           qplib_ctx;
+       struct bnxt_qplib_res           qplib_res;
+       struct bnxt_qplib_dpi           dpi_privileged;
+diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c
+index 1e63f80917483..656c150e38e6f 100644
+--- a/drivers/infiniband/hw/bnxt_re/hw_counters.c
++++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c
+@@ -357,7 +357,7 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
+                       goto done;
+               }
+               bnxt_re_copy_err_stats(rdev, stats, err_s);
+-              if (_is_ext_stats_supported(rdev->dev_attr.dev_cap_flags) &&
++              if (_is_ext_stats_supported(rdev->dev_attr->dev_cap_flags) &&
+                   !rdev->is_virtfn) {
+                       rc = bnxt_re_get_ext_stat(rdev, stats);
+                       if (rc) {
+diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+index 1ff2e176b0369..4b61867188c4c 100644
+--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
++++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+@@ -161,7 +161,7 @@ static int __qp_access_flags_to_ib(struct bnxt_qplib_chip_ctx *cctx, u8 qflags)
+ static void bnxt_re_check_and_set_relaxed_ordering(struct bnxt_re_dev *rdev,
+                                                  struct bnxt_qplib_mrw *qplib_mr)
+ {
+-      if (_is_relaxed_ordering_supported(rdev->dev_attr.dev_cap_flags2) &&
++      if (_is_relaxed_ordering_supported(rdev->dev_attr->dev_cap_flags2) &&
+           pcie_relaxed_ordering_enabled(rdev->en_dev->pdev))
+               qplib_mr->flags |= CMDQ_REGISTER_MR_FLAGS_ENABLE_RO;
+ }
+@@ -186,7 +186,7 @@ int bnxt_re_query_device(struct ib_device *ibdev,
+                        struct ib_udata *udata)
+ {
+       struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+-      struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
++      struct bnxt_qplib_dev_attr *dev_attr = rdev->dev_attr;
+ 
+       memset(ib_attr, 0, sizeof(*ib_attr));
+       memcpy(&ib_attr->fw_ver, dev_attr->fw_ver,
+@@ -275,7 +275,7 @@ int bnxt_re_query_port(struct ib_device *ibdev, u32 port_num,
+                      struct ib_port_attr *port_attr)
+ {
+       struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+-      struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
++      struct bnxt_qplib_dev_attr *dev_attr = rdev->dev_attr;
+       int rc;
+ 
+       memset(port_attr, 0, sizeof(*port_attr));
+@@ -333,8 +333,8 @@ void bnxt_re_query_fw_str(struct ib_device *ibdev, char *str)
+       struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+ 
+       snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%d.%d",
+-               rdev->dev_attr.fw_ver[0], rdev->dev_attr.fw_ver[1],
+-               rdev->dev_attr.fw_ver[2], rdev->dev_attr.fw_ver[3]);
++               rdev->dev_attr->fw_ver[0], rdev->dev_attr->fw_ver[1],
++               rdev->dev_attr->fw_ver[2], rdev->dev_attr->fw_ver[3]);
+ }
+ 
+ int bnxt_re_query_pkey(struct ib_device *ibdev, u32 port_num,
+@@ -585,7 +585,7 @@ static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd)
+       mr->qplib_mr.pd = &pd->qplib_pd;
+       mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR;
+       mr->qplib_mr.access_flags = __from_ib_access_flags(mr_access_flags);
+-      if (!_is_alloc_mr_unified(rdev->dev_attr.dev_cap_flags)) {
++      if (!_is_alloc_mr_unified(rdev->dev_attr->dev_cap_flags)) {
+               rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr);
+               if (rc) {
+                       ibdev_err(&rdev->ibdev, "Failed to alloc fence-HW-MR\n");
+@@ -1057,7 +1057,7 @@ static int bnxt_re_setup_swqe_size(struct bnxt_re_qp *qp,
+       rdev = qp->rdev;
+       qplqp = &qp->qplib_qp;
+       sq = &qplqp->sq;
+-      dev_attr = &rdev->dev_attr;
++      dev_attr = rdev->dev_attr;
+ 
+       align = sizeof(struct sq_send_hdr);
+       ilsize = ALIGN(init_attr->cap.max_inline_data, align);
+@@ -1277,7 +1277,7 @@ static int bnxt_re_init_rq_attr(struct bnxt_re_qp *qp,
+       rdev = qp->rdev;
+       qplqp = &qp->qplib_qp;
+       rq = &qplqp->rq;
+-      dev_attr = &rdev->dev_attr;
++      dev_attr = rdev->dev_attr;
+ 
+       if (init_attr->srq) {
+               struct bnxt_re_srq *srq;
+@@ -1314,7 +1314,7 @@ static void bnxt_re_adjust_gsi_rq_attr(struct bnxt_re_qp *qp)
+ 
+       rdev = qp->rdev;
+       qplqp = &qp->qplib_qp;
+-      dev_attr = &rdev->dev_attr;
++      dev_attr = rdev->dev_attr;
+ 
+       if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) {
+               qplqp->rq.max_sge = dev_attr->max_qp_sges;
+@@ -1340,7 +1340,7 @@ static int bnxt_re_init_sq_attr(struct bnxt_re_qp *qp,
+       rdev = qp->rdev;
+       qplqp = &qp->qplib_qp;
+       sq = &qplqp->sq;
+-      dev_attr = &rdev->dev_attr;
++      dev_attr = rdev->dev_attr;
+ 
+       sq->max_sge = init_attr->cap.max_send_sge;
+       entries = init_attr->cap.max_send_wr;
+@@ -1393,7 +1393,7 @@ static void bnxt_re_adjust_gsi_sq_attr(struct bnxt_re_qp *qp,
+ 
+       rdev = qp->rdev;
+       qplqp = &qp->qplib_qp;
+-      dev_attr = &rdev->dev_attr;
++      dev_attr = rdev->dev_attr;
+ 
+       if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) {
+               entries = bnxt_re_init_depth(init_attr->cap.max_send_wr + 1, uctx);
+@@ -1442,7 +1442,7 @@ static int bnxt_re_init_qp_attr(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd,
+ 
+       rdev = qp->rdev;
+       qplqp = &qp->qplib_qp;
+-      dev_attr = &rdev->dev_attr;
++      dev_attr = rdev->dev_attr;
+ 
+       /* Setup misc params */
+       ether_addr_copy(qplqp->smac, rdev->netdev->dev_addr);
+@@ -1612,7 +1612,7 @@ int bnxt_re_create_qp(struct ib_qp *ib_qp, struct ib_qp_init_attr *qp_init_attr,
+       ib_pd = ib_qp->pd;
+       pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+       rdev = pd->rdev;
+-      dev_attr = &rdev->dev_attr;
++      dev_attr = rdev->dev_attr;
+       qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
+ 
+       uctx = rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, ib_uctx);
+@@ -1840,7 +1840,7 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq,
+       ib_pd = ib_srq->pd;
+       pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+       rdev = pd->rdev;
+-      dev_attr = &rdev->dev_attr;
++      dev_attr = rdev->dev_attr;
+       srq = container_of(ib_srq, struct bnxt_re_srq, ib_srq);
+ 
+       if (srq_init_attr->attr.max_wr >= dev_attr->max_srq_wqes) {
+@@ -2044,7 +2044,7 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
+ {
+       struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
+       struct bnxt_re_dev *rdev = qp->rdev;
+-      struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
++      struct bnxt_qplib_dev_attr *dev_attr = rdev->dev_attr;
+       enum ib_qp_state curr_qp_state, new_qp_state;
+       int rc, entries;
+       unsigned int flags;
+@@ -3091,7 +3091,7 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+       struct ib_udata *udata = &attrs->driver_udata;
+       struct bnxt_re_ucontext *uctx =
+               rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, ib_uctx);
+-      struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
++      struct bnxt_qplib_dev_attr *dev_attr = rdev->dev_attr;
+       struct bnxt_qplib_chip_ctx *cctx;
+       int cqe = attr->cqe;
+       int rc, entries;
+@@ -3226,7 +3226,7 @@ int bnxt_re_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
+ 
+       cq =  container_of(ibcq, struct bnxt_re_cq, ib_cq);
+       rdev = cq->rdev;
+-      dev_attr = &rdev->dev_attr;
++      dev_attr = rdev->dev_attr;
+       if (!ibcq->uobject) {
+               ibdev_err(&rdev->ibdev, "Kernel CQ Resize not supported");
+               return -EOPNOTSUPP;
+@@ -4199,7 +4199,7 @@ static struct ib_mr *__bnxt_re_user_reg_mr(struct ib_pd *ib_pd, u64 length, u64
+       mr->qplib_mr.access_flags = __from_ib_access_flags(mr_access_flags);
+       mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_MR;
+ 
+-      if (!_is_alloc_mr_unified(rdev->dev_attr.dev_cap_flags)) {
++      if (!_is_alloc_mr_unified(rdev->dev_attr->dev_cap_flags)) {
+               rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr);
+               if (rc) {
+                       ibdev_err(&rdev->ibdev, "Failed to allocate MR rc = %d", rc);
+@@ -4291,7 +4291,7 @@ int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata)
+       struct bnxt_re_ucontext *uctx =
+               container_of(ctx, struct bnxt_re_ucontext, ib_uctx);
+       struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+-      struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
++      struct bnxt_qplib_dev_attr *dev_attr = rdev->dev_attr;
+       struct bnxt_re_user_mmap_entry *entry;
+       struct bnxt_re_uctx_resp resp = {};
+       struct bnxt_re_uctx_req ureq = {};
+diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
+index 7368b0482bb87..b29687ec2ea31 100644
+--- a/drivers/infiniband/hw/bnxt_re/main.c
++++ b/drivers/infiniband/hw/bnxt_re/main.c
+@@ -153,6 +153,10 @@ static void bnxt_re_destroy_chip_ctx(struct bnxt_re_dev *rdev)
+ 
+       if (!rdev->chip_ctx)
+               return;
++
++      kfree(rdev->dev_attr);
++      rdev->dev_attr = NULL;
++
+       chip_ctx = rdev->chip_ctx;
+       rdev->chip_ctx = NULL;
+       rdev->rcfw.res = NULL;
+@@ -166,7 +170,7 @@ static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev)
+ {
+       struct bnxt_qplib_chip_ctx *chip_ctx;
+       struct bnxt_en_dev *en_dev;
+-      int rc;
++      int rc = -ENOMEM;
+ 
+       en_dev = rdev->en_dev;
+ 
+@@ -182,7 +186,10 @@ static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev)
+ 
+       rdev->qplib_res.cctx = rdev->chip_ctx;
+       rdev->rcfw.res = &rdev->qplib_res;
+-      rdev->qplib_res.dattr = &rdev->dev_attr;
++      rdev->dev_attr = kzalloc(sizeof(*rdev->dev_attr), GFP_KERNEL);
++      if (!rdev->dev_attr)
++              goto free_chip_ctx;
++      rdev->qplib_res.dattr = rdev->dev_attr;
+       rdev->qplib_res.is_vf = BNXT_EN_VF(en_dev);
+       rdev->qplib_res.en_dev = en_dev;
+ 
+@@ -190,16 +197,20 @@ static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev)
+ 
+       bnxt_re_set_db_offset(rdev);
+       rc = bnxt_qplib_map_db_bar(&rdev->qplib_res);
+-      if (rc) {
+-              kfree(rdev->chip_ctx);
+-              rdev->chip_ctx = NULL;
+-              return rc;
+-      }
++      if (rc)
++              goto free_dev_attr;
+ 
+       if (bnxt_qplib_determine_atomics(en_dev->pdev))
+               ibdev_info(&rdev->ibdev,
+                          "platform doesn't support global atomics.");
+       return 0;
++free_dev_attr:
++      kfree(rdev->dev_attr);
++      rdev->dev_attr = NULL;
++free_chip_ctx:
++      kfree(rdev->chip_ctx);
++      rdev->chip_ctx = NULL;
++      return rc;
+ }
+ 
+ /* SR-IOV helper functions */
+@@ -221,7 +232,7 @@ static void bnxt_re_limit_pf_res(struct bnxt_re_dev *rdev)
+       struct bnxt_qplib_ctx *ctx;
+       int i;
+ 
+-      attr = &rdev->dev_attr;
++      attr = rdev->dev_attr;
+       ctx = &rdev->qplib_ctx;
+ 
+       ctx->qpc_count = min_t(u32, BNXT_RE_MAX_QPC_COUNT,
+@@ -235,7 +246,7 @@ static void bnxt_re_limit_pf_res(struct bnxt_re_dev *rdev)
+       if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))
+               for (i = 0; i < MAX_TQM_ALLOC_REQ; i++)
+                       rdev->qplib_ctx.tqm_ctx.qcount[i] =
+-                      rdev->dev_attr.tqm_alloc_reqs[i];
++                      rdev->dev_attr->tqm_alloc_reqs[i];
+ }
+ 
+ static void bnxt_re_limit_vf_res(struct bnxt_qplib_ctx *qplib_ctx, u32 num_vf)
+@@ -1631,12 +1642,11 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
+ 
+       /* Configure and allocate resources for qplib */
+       rdev->qplib_res.rcfw = &rdev->rcfw;
+-      rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr);
++      rc = bnxt_qplib_get_dev_attr(&rdev->rcfw);
+       if (rc)
+               goto fail;
+ 
+-      rc = bnxt_qplib_alloc_res(&rdev->qplib_res, rdev->en_dev->pdev,
+-                                rdev->netdev, &rdev->dev_attr);
++      rc = bnxt_qplib_alloc_res(&rdev->qplib_res, rdev->netdev);
+       if (rc)
+               goto fail;
+ 
+@@ -2036,7 +2046,7 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type)
+                       rdev->pacing.dbr_pacing = false;
+               }
+       }
+-      rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr);
++      rc = bnxt_qplib_get_dev_attr(&rdev->rcfw);
+       if (rc)
+               goto disable_rcfw;
+ 
+diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c
+index 96ceec1e8199a..02922a0987ad7 100644
+--- a/drivers/infiniband/hw/bnxt_re/qplib_res.c
++++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c
+@@ -876,14 +876,13 @@ void bnxt_qplib_free_res(struct bnxt_qplib_res *res)
+       bnxt_qplib_free_dpi_tbl(res, &res->dpi_tbl);
+ }
+ 
+-int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct pci_dev *pdev,
+-                       struct net_device *netdev,
+-                       struct bnxt_qplib_dev_attr *dev_attr)
++int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct net_device *netdev)
+ {
++      struct bnxt_qplib_dev_attr *dev_attr;
+       int rc;
+ 
+-      res->pdev = pdev;
+       res->netdev = netdev;
++      dev_attr = res->dattr;
+ 
+       rc = bnxt_qplib_alloc_sgid_tbl(res, &res->sgid_tbl, dev_attr->max_sgid);
+       if (rc)
+diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h
+index cbfc49a1a56d7..be5d907a036b6 100644
+--- a/drivers/infiniband/hw/bnxt_re/qplib_res.h
++++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h
+@@ -424,9 +424,7 @@ int bnxt_qplib_dealloc_dpi(struct bnxt_qplib_res *res,
+ void bnxt_qplib_cleanup_res(struct bnxt_qplib_res *res);
+ int bnxt_qplib_init_res(struct bnxt_qplib_res *res);
+ void bnxt_qplib_free_res(struct bnxt_qplib_res *res);
+-int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct pci_dev *pdev,
+-                       struct net_device *netdev,
+-                       struct bnxt_qplib_dev_attr *dev_attr);
++int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct net_device *netdev);
+ void bnxt_qplib_free_ctx(struct bnxt_qplib_res *res,
+                        struct bnxt_qplib_ctx *ctx);
+ int bnxt_qplib_alloc_ctx(struct bnxt_qplib_res *res,
+diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
+index 9df3e3271577d..2e09616736bc7 100644
+--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c
++++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
+@@ -88,9 +88,9 @@ static void bnxt_qplib_query_version(struct bnxt_qplib_rcfw *rcfw,
+       fw_ver[3] = resp.fw_rsvd;
+ }
+ 
+-int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
+-                          struct bnxt_qplib_dev_attr *attr)
++int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw)
+ {
++      struct bnxt_qplib_dev_attr *attr = rcfw->res->dattr;
+       struct creq_query_func_resp resp = {};
+       struct bnxt_qplib_cmdqmsg msg = {};
+       struct creq_query_func_resp_sb *sb;
+diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
+index e6beeb514b7dd..a1878eec7ba62 100644
+--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h
++++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
+@@ -325,8 +325,7 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
+ int bnxt_qplib_update_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
+                          struct bnxt_qplib_gid *gid, u16 gid_idx,
+                          const u8 *smac);
+-int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
+-                          struct bnxt_qplib_dev_attr *attr);
++int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw);
+ int bnxt_qplib_set_func_resources(struct bnxt_qplib_res *res,
+                                 struct bnxt_qplib_rcfw *rcfw,
+                                 struct bnxt_qplib_ctx *ctx);
+-- 
+2.39.5
+
diff --git a/queue-6.13/rdma-bnxt_re-fix-the-page-details-for-the-srq-create.patch b/queue-6.13/rdma-bnxt_re-fix-the-page-details-for-the-srq-create.patch

new file mode 100644 (file)

index 0000000..a836a46
--- /dev/null
+++ b/queue-6.13/rdma-bnxt_re-fix-the-page-details-for-the-srq-create.patch
@@ -0,0 +1,65 @@
+From 41cc26394ff51c0f5deb6f0a2fb4b840094ad1aa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 22 Feb 2025 07:20:21 -0800
+Subject: RDMA/bnxt_re: Fix the page details for the srq created by kernel
+ consumers
+
+From: Kashyap Desai <kashyap.desai@broadcom.com>
+
+[ Upstream commit b66535356a4834a234f99e16a97eb51f2c6c5a7d ]
+
+While using nvme target with use_srq on, below kernel panic is noticed.
+
+[  549.698111] bnxt_en 0000:41:00.0 enp65s0np0: FEC autoneg off encoding: Clause 91 RS(544,514)
+[  566.393619] Oops: divide error: 0000 [#1] PREEMPT SMP NOPTI
+..
+[  566.393799]  <TASK>
+[  566.393807]  ? __die_body+0x1a/0x60
+[  566.393823]  ? die+0x38/0x60
+[  566.393835]  ? do_trap+0xe4/0x110
+[  566.393847]  ? bnxt_qplib_alloc_init_hwq+0x1d4/0x580 [bnxt_re]
+[  566.393867]  ? bnxt_qplib_alloc_init_hwq+0x1d4/0x580 [bnxt_re]
+[  566.393881]  ? do_error_trap+0x7c/0x120
+[  566.393890]  ? bnxt_qplib_alloc_init_hwq+0x1d4/0x580 [bnxt_re]
+[  566.393911]  ? exc_divide_error+0x34/0x50
+[  566.393923]  ? bnxt_qplib_alloc_init_hwq+0x1d4/0x580 [bnxt_re]
+[  566.393939]  ? asm_exc_divide_error+0x16/0x20
+[  566.393966]  ? bnxt_qplib_alloc_init_hwq+0x1d4/0x580 [bnxt_re]
+[  566.393997]  bnxt_qplib_create_srq+0xc9/0x340 [bnxt_re]
+[  566.394040]  bnxt_re_create_srq+0x335/0x3b0 [bnxt_re]
+[  566.394057]  ? srso_return_thunk+0x5/0x5f
+[  566.394068]  ? __init_swait_queue_head+0x4a/0x60
+[  566.394090]  ib_create_srq_user+0xa7/0x150 [ib_core]
+[  566.394147]  nvmet_rdma_queue_connect+0x7d0/0xbe0 [nvmet_rdma]
+[  566.394174]  ? lock_release+0x22c/0x3f0
+[  566.394187]  ? srso_return_thunk+0x5/0x5f
+
+Page size and shift info is set only for the user space SRQs.
+Set page size and page shift for kernel space SRQs also.
+
+Fixes: 0c4dcd602817 ("RDMA/bnxt_re: Refactor hardware queue memory allocation")
+Signed-off-by: Kashyap Desai <kashyap.desai@broadcom.com>
+Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
+Link: https://patch.msgid.link/1740237621-29291-1-git-send-email-selvin.xavier@broadcom.com
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/infiniband/hw/bnxt_re/ib_verbs.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+index 4b61867188c4c..0ed62d3e494c0 100644
+--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
++++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+@@ -1872,6 +1872,8 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq,
+       srq->qplib_srq.threshold = srq_init_attr->attr.srq_limit;
+       srq->srq_limit = srq_init_attr->attr.srq_limit;
+       srq->qplib_srq.eventq_hw_ring_id = rdev->nqr->nq[0].ring_id;
++      srq->qplib_srq.sg_info.pgsize = PAGE_SIZE;
++      srq->qplib_srq.sg_info.pgshft = PAGE_SHIFT;
+       nq = &rdev->nqr->nq[0];
+ 
+       if (udata) {
+-- 
+2.39.5
+
diff --git a/queue-6.13/rdma-bnxt_re-fix-the-statistics-for-gen-p7-vf.patch b/queue-6.13/rdma-bnxt_re-fix-the-statistics-for-gen-p7-vf.patch

new file mode 100644 (file)

index 0000000..24b4f1c
--- /dev/null
+++ b/queue-6.13/rdma-bnxt_re-fix-the-statistics-for-gen-p7-vf.patch
@@ -0,0 +1,60 @@
+From 9d1cf1fa588436db6358240ec7e541d7cad8c57e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Feb 2025 00:21:25 -0800
+Subject: RDMA/bnxt_re: Fix the statistics for Gen P7 VF
+
+From: Selvin Xavier <selvin.xavier@broadcom.com>
+
+[ Upstream commit 8238c7bd84209c8216b1381ab0dbe6db9e203769 ]
+
+Gen P7 VF support the extended stats and is prevented
+by a VF check. Fix the check to issue the FW command
+for GenP7 VFs also.
+
+Fixes: 1801d87b3598 ("RDMA/bnxt_re: Support new 5760X P7 devices")
+Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
+Link: https://patch.msgid.link/1738657285-23968-5-git-send-email-selvin.xavier@broadcom.com
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/infiniband/hw/bnxt_re/hw_counters.c | 4 ++--
+ drivers/infiniband/hw/bnxt_re/qplib_res.h   | 8 ++++++++
+ 2 files changed, 10 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c
+index 656c150e38e6f..f51adb0a97e66 100644
+--- a/drivers/infiniband/hw/bnxt_re/hw_counters.c
++++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c
+@@ -357,8 +357,8 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
+                       goto done;
+               }
+               bnxt_re_copy_err_stats(rdev, stats, err_s);
+-              if (_is_ext_stats_supported(rdev->dev_attr->dev_cap_flags) &&
+-                  !rdev->is_virtfn) {
++              if (bnxt_ext_stats_supported(rdev->chip_ctx, rdev->dev_attr->dev_cap_flags,
++                                           rdev->is_virtfn)) {
+                       rc = bnxt_re_get_ext_stat(rdev, stats);
+                       if (rc) {
+                               clear_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS,
+diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h
+index be5d907a036b6..711990232de1c 100644
+--- a/drivers/infiniband/hw/bnxt_re/qplib_res.h
++++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h
+@@ -547,6 +547,14 @@ static inline bool _is_ext_stats_supported(u16 dev_cap_flags)
+               CREQ_QUERY_FUNC_RESP_SB_EXT_STATS;
+ }
+ 
++static inline int bnxt_ext_stats_supported(struct bnxt_qplib_chip_ctx *ctx,
++                                         u16 flags, bool virtfn)
++{
++      /* ext stats supported if cap flag is set AND is a PF OR a Thor2 VF */
++      return (_is_ext_stats_supported(flags) &&
++              ((virtfn && bnxt_qplib_is_chip_gen_p7(ctx)) || (!virtfn)));
++}
++
+ static inline bool _is_hw_retx_supported(u16 dev_cap_flags)
+ {
+       return dev_cap_flags &
+-- 
+2.39.5
+
diff --git a/queue-6.13/rdma-hns-fix-mbox-timing-out-by-adding-retry-mechani.patch b/queue-6.13/rdma-hns-fix-mbox-timing-out-by-adding-retry-mechani.patch

new file mode 100644 (file)

index 0000000..5e7fcbd
--- /dev/null
+++ b/queue-6.13/rdma-hns-fix-mbox-timing-out-by-adding-retry-mechani.patch
@@ -0,0 +1,167 @@
+From 1e6f4df5c52a78015f0cf17051c34572da8ccfc7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 8 Feb 2025 18:59:30 +0800
+Subject: RDMA/hns: Fix mbox timing out by adding retry mechanism
+
+From: Junxian Huang <huangjunxian6@hisilicon.com>
+
+[ Upstream commit 9747c0c7791d4a5a62018a0c9c563dd2e6f6c1c0 ]
+
+If a QP is modified to error state and a flush CQE process is triggered,
+the subsequent QP destruction mbox can still be successfully posted but
+will be blocked in HW until the flush CQE process finishes. This causes
+further mbox posting timeouts in driver. The blocking time is related
+to QP depth. Considering an extreme case where SQ depth and RQ depth
+are both 32K, the blocking time can reach about 135ms.
+
+This patch adds a retry mechanism for mbox posting. For each try, FW
+waits 15ms for HW to complete the previous mbox, otherwise return a
+timeout error code to driver. Counting other time consumption in FW,
+set 8 tries for mbox posting and a 5ms time gap before each retry to
+increase to a sufficient timeout limit.
+
+Fixes: 0425e3e6e0c7 ("RDMA/hns: Support flush cqe for hip08 in kernel space")
+Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com>
+Link: https://patch.msgid.link/20250208105930.522796-1-huangjunxian6@hisilicon.com
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 64 ++++++++++++++++------
+ drivers/infiniband/hw/hns/hns_roce_hw_v2.h |  2 +
+ 2 files changed, 50 insertions(+), 16 deletions(-)
+
+diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+index 0144e7210d05a..f5c3e560df58d 100644
+--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
++++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+@@ -1286,10 +1286,8 @@ static u32 hns_roce_cmdq_tx_timeout(u16 opcode, u32 tx_timeout)
+       return tx_timeout;
+ }
+ 
+-static void hns_roce_wait_csq_done(struct hns_roce_dev *hr_dev, u16 opcode)
++static void hns_roce_wait_csq_done(struct hns_roce_dev *hr_dev, u32 tx_timeout)
+ {
+-      struct hns_roce_v2_priv *priv = hr_dev->priv;
+-      u32 tx_timeout = hns_roce_cmdq_tx_timeout(opcode, priv->cmq.tx_timeout);
+       u32 timeout = 0;
+ 
+       do {
+@@ -1299,8 +1297,9 @@ static void hns_roce_wait_csq_done(struct hns_roce_dev *hr_dev, u16 opcode)
+       } while (++timeout < tx_timeout);
+ }
+ 
+-static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
+-                             struct hns_roce_cmq_desc *desc, int num)
++static int __hns_roce_cmq_send_one(struct hns_roce_dev *hr_dev,
++                                 struct hns_roce_cmq_desc *desc,
++                                 int num, u32 tx_timeout)
+ {
+       struct hns_roce_v2_priv *priv = hr_dev->priv;
+       struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq;
+@@ -1309,8 +1308,6 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
+       int ret;
+       int i;
+ 
+-      spin_lock_bh(&csq->lock);
+-
+       tail = csq->head;
+ 
+       for (i = 0; i < num; i++) {
+@@ -1324,22 +1321,17 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
+ 
+       atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_CNT]);
+ 
+-      hns_roce_wait_csq_done(hr_dev, le16_to_cpu(desc->opcode));
++      hns_roce_wait_csq_done(hr_dev, tx_timeout);
+       if (hns_roce_cmq_csq_done(hr_dev)) {
+               ret = 0;
+               for (i = 0; i < num; i++) {
+                       /* check the result of hardware write back */
+-                      desc[i] = csq->desc[tail++];
++                      desc_ret = le16_to_cpu(csq->desc[tail++].retval);
+                       if (tail == csq->desc_num)
+                               tail = 0;
+-
+-                      desc_ret = le16_to_cpu(desc[i].retval);
+                       if (likely(desc_ret == CMD_EXEC_SUCCESS))
+                               continue;
+ 
+-                      dev_err_ratelimited(hr_dev->dev,
+-                                          "Cmdq IO error, opcode = 0x%x, return = 0x%x.\n",
+-                                          desc->opcode, desc_ret);
+                       ret = hns_roce_cmd_err_convert_errno(desc_ret);
+               }
+       } else {
+@@ -1354,14 +1346,54 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
+               ret = -EAGAIN;
+       }
+ 
+-      spin_unlock_bh(&csq->lock);
+-
+       if (ret)
+               atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_ERR_CNT]);
+ 
+       return ret;
+ }
+ 
++static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
++                             struct hns_roce_cmq_desc *desc, int num)
++{
++      struct hns_roce_v2_priv *priv = hr_dev->priv;
++      struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq;
++      u16 opcode = le16_to_cpu(desc->opcode);
++      u32 tx_timeout = hns_roce_cmdq_tx_timeout(opcode, priv->cmq.tx_timeout);
++      u8 try_cnt = HNS_ROCE_OPC_POST_MB_TRY_CNT;
++      u32 rsv_tail;
++      int ret;
++      int i;
++
++      while (try_cnt) {
++              try_cnt--;
++
++              spin_lock_bh(&csq->lock);
++              rsv_tail = csq->head;
++              ret = __hns_roce_cmq_send_one(hr_dev, desc, num, tx_timeout);
++              if (opcode == HNS_ROCE_OPC_POST_MB && ret == -ETIME &&
++                  try_cnt) {
++                      spin_unlock_bh(&csq->lock);
++                      mdelay(HNS_ROCE_OPC_POST_MB_RETRY_GAP_MSEC);
++                      continue;
++              }
++
++              for (i = 0; i < num; i++) {
++                      desc[i] = csq->desc[rsv_tail++];
++                      if (rsv_tail == csq->desc_num)
++                              rsv_tail = 0;
++              }
++              spin_unlock_bh(&csq->lock);
++              break;
++      }
++
++      if (ret)
++              dev_err_ratelimited(hr_dev->dev,
++                                  "Cmdq IO error, opcode = 0x%x, return = %d.\n",
++                                  opcode, ret);
++
++      return ret;
++}
++
+ static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
+                            struct hns_roce_cmq_desc *desc, int num)
+ {
+diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+index cbdbc9edbce6e..91a5665465ffb 100644
+--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
++++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+@@ -230,6 +230,8 @@ enum hns_roce_opcode_type {
+ };
+ 
+ #define HNS_ROCE_OPC_POST_MB_TIMEOUT 35000
++#define HNS_ROCE_OPC_POST_MB_TRY_CNT 8
++#define HNS_ROCE_OPC_POST_MB_RETRY_GAP_MSEC 5
+ struct hns_roce_cmdq_tx_timeout_map {
+       u16 opcode;
+       u32 tx_timeout;
+-- 
+2.39.5
+
diff --git a/queue-6.13/rdma-mana_ib-allocate-page-aligned-doorbell-index.patch b/queue-6.13/rdma-mana_ib-allocate-page-aligned-doorbell-index.patch

new file mode 100644 (file)

index 0000000..4715b39
--- /dev/null
+++ b/queue-6.13/rdma-mana_ib-allocate-page-aligned-doorbell-index.patch
@@ -0,0 +1,39 @@
+From f859f7ea531c50739d0eb570f99c747b89b6909c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 5 Feb 2025 02:30:05 -0800
+Subject: RDMA/mana_ib: Allocate PAGE aligned doorbell index
+
+From: Konstantin Taranov <kotaranov@microsoft.com>
+
+[ Upstream commit 29b7bb98234cc287cebef9bccf638c2e3f39be71 ]
+
+Allocate a PAGE aligned doorbell index to ensure each process gets a
+separate PAGE sized doorbell area space remapped to it in mana_ib_mmap
+
+Fixes: 0266a177631d ("RDMA/mana_ib: Add a driver for Microsoft Azure Network Adapter")
+Signed-off-by: Shiraz Saleem <shirazsaleem@microsoft.com>
+Signed-off-by: Konstantin Taranov <kotaranov@microsoft.com>
+Link: https://patch.msgid.link/1738751405-15041-1-git-send-email-kotaranov@linux.microsoft.com
+Reviewed-by: Long Li <longli@microsoft.com>
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/infiniband/hw/mana/main.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c
+index 67c2d43135a8a..457cea6d99095 100644
+--- a/drivers/infiniband/hw/mana/main.c
++++ b/drivers/infiniband/hw/mana/main.c
+@@ -174,7 +174,7 @@ static int mana_gd_allocate_doorbell_page(struct gdma_context *gc,
+ 
+       req.resource_type = GDMA_RESOURCE_DOORBELL_PAGE;
+       req.num_resources = 1;
+-      req.alignment = 1;
++      req.alignment = PAGE_SIZE / MANA_PAGE_SIZE;
+ 
+       /* Have GDMA start searching from 0 */
+       req.allocated_resources = 0;
+-- 
+2.39.5
+
diff --git a/queue-6.13/rdma-mlx5-fix-a-race-for-dmabuf-mr-which-can-lead-to.patch b/queue-6.13/rdma-mlx5-fix-a-race-for-dmabuf-mr-which-can-lead-to.patch

new file mode 100644 (file)

index 0000000..16f18b2
--- /dev/null
+++ b/queue-6.13/rdma-mlx5-fix-a-race-for-dmabuf-mr-which-can-lead-to.patch
@@ -0,0 +1,89 @@
+From 37813bd2c0e55506b9586643f95b861ad0cd19cd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 3 Feb 2025 14:50:59 +0200
+Subject: RDMA/mlx5: Fix a race for DMABUF MR which can lead to CQE with error
+
+From: Yishai Hadas <yishaih@nvidia.com>
+
+[ Upstream commit cc668a11e6ac8adb0e016711080d3f314722cc91 ]
+
+This patch addresses a potential race condition for a DMABUF MR that can
+result in a CQE with an error on the UMR QP.
+
+During the __mlx5_ib_dereg_mr() flow, the following sequence of calls
+occurs:
+mlx5_revoke_mr()
+mlx5r_umr_revoke_mr()
+mlx5r_umr_post_send_wait()
+At this point, the lkey is freed from the hardware's perspective.
+
+However, concurrently, mlx5_ib_dmabuf_invalidate_cb() might be triggered
+by another task attempting to invalidate the MR having that freed lkey.
+
+Since the lkey has already been freed, this can lead to a CQE error,
+causing the UMR QP to enter an error state.
+
+To resolve this race condition, the dma_resv_lock() which was hold as
+part of the mlx5_ib_dmabuf_invalidate_cb() is now also acquired as part
+of the mlx5_revoke_mr() scope.
+
+Upon a successful revoke, we set umem_dmabuf->private which points to
+that MR to NULL, preventing any further invalidation attempts on its
+lkey.
+
+Fixes: e6fb246ccafb ("RDMA/mlx5: Consolidate MR destruction to mlx5_ib_dereg_mr()")
+Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
+Reviewed-by: Artemy Kovalyov <artemyko@mnvidia.com>
+Link: https://patch.msgid.link/70617067abbfaa0c816a2544c922e7f4346def58.1738587016.git.leon@kernel.org
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/infiniband/hw/mlx5/mr.c | 13 ++++++++++++-
+ 1 file changed, 12 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
+index bb02b6adbf2c2..0a3cbb14e1839 100644
+--- a/drivers/infiniband/hw/mlx5/mr.c
++++ b/drivers/infiniband/hw/mlx5/mr.c
+@@ -1550,7 +1550,7 @@ static void mlx5_ib_dmabuf_invalidate_cb(struct dma_buf_attachment *attach)
+ 
+       dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv);
+ 
+-      if (!umem_dmabuf->sgt)
++      if (!umem_dmabuf->sgt || !mr)
+               return;
+ 
+       mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP);
+@@ -2022,11 +2022,16 @@ static int mlx5_revoke_mr(struct mlx5_ib_mr *mr)
+       struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
+       struct mlx5_cache_ent *ent = mr->mmkey.cache_ent;
+       bool is_odp = is_odp_mr(mr);
++      bool is_odp_dma_buf = is_dmabuf_mr(mr) &&
++                      !to_ib_umem_dmabuf(mr->umem)->pinned;
+       int ret = 0;
+ 
+       if (is_odp)
+               mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex);
+ 
++      if (is_odp_dma_buf)
++              dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv, NULL);
++
+       if (mr->mmkey.cacheable && !mlx5r_umr_revoke_mr(mr) && !cache_ent_find_and_store(dev, mr)) {
+               ent = mr->mmkey.cache_ent;
+               /* upon storing to a clean temp entry - schedule its cleanup */
+@@ -2054,6 +2059,12 @@ static int mlx5_revoke_mr(struct mlx5_ib_mr *mr)
+               mutex_unlock(&to_ib_umem_odp(mr->umem)->umem_mutex);
+       }
+ 
++      if (is_odp_dma_buf) {
++              if (!ret)
++                      to_ib_umem_dmabuf(mr->umem)->private = NULL;
++              dma_resv_unlock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv);
++      }
++
+       return ret;
+ }
+ 
+-- 
+2.39.5
+
diff --git a/queue-6.13/rdma-mlx5-fix-a-warn-during-dereg_mr-for-dm-type.patch b/queue-6.13/rdma-mlx5-fix-a-warn-during-dereg_mr-for-dm-type.patch

new file mode 100644 (file)

index 0000000..c50ead1
--- /dev/null
+++ b/queue-6.13/rdma-mlx5-fix-a-warn-during-dereg_mr-for-dm-type.patch
@@ -0,0 +1,106 @@
+From 4ceca425be2796ddc278701cd62f03251d43d39c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 3 Feb 2025 14:51:43 +0200
+Subject: RDMA/mlx5: Fix a WARN during dereg_mr for DM type
+
+From: Yishai Hadas <yishaih@nvidia.com>
+
+[ Upstream commit abc7b3f1f056d69a8f11d6dceecc0c9549ace770 ]
+
+Memory regions (MR) of type DM (device memory) do not have an associated
+umem.
+
+In the __mlx5_ib_dereg_mr() -> mlx5_free_priv_descs() flow, the code
+incorrectly takes the wrong branch, attempting to call
+dma_unmap_single() on a DMA address that is not mapped.
+
+This results in a WARN [1], as shown below.
+
+The issue is resolved by properly accounting for the DM type and
+ensuring the correct branch is selected in mlx5_free_priv_descs().
+
+[1]
+WARNING: CPU: 12 PID: 1346 at drivers/iommu/dma-iommu.c:1230 iommu_dma_unmap_page+0x79/0x90
+Modules linked in: ip6table_mangle ip6table_nat ip6table_filter ip6_tables iptable_mangle xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat br_netfilter rpcsec_gss_krb5 auth_rpcgss oid_registry ovelay rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_iscsi ib_umad rdma_cm ib_ipoib iw_cm ib_cm mlx5_ib ib_uverbs ib_core fuse mlx5_core
+CPU: 12 UID: 0 PID: 1346 Comm: ibv_rc_pingpong Not tainted 6.12.0-rc7+ #1631
+Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
+RIP: 0010:iommu_dma_unmap_page+0x79/0x90
+Code: 2b 49 3b 29 72 26 49 3b 69 08 73 20 4d 89 f0 44 89 e9 4c 89 e2 48 89 ee 48 89 df 5b 5d 41 5c 41 5d 41 5e 41 5f e9 07 b8 88 ff <0f> 0b 5b 5d 41 5c 41 5d 41 5e 41 5f c3 cc cc cc cc 66 0f 1f 44 00
+RSP: 0018:ffffc90001913a10 EFLAGS: 00010246
+RAX: 0000000000000000 RBX: ffff88810194b0a8 RCX: 0000000000000000
+RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000001
+RBP: ffff88810194b0a8 R08: 0000000000000000 R09: 0000000000000000
+R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000000
+R13: 0000000000000001 R14: 0000000000000000 R15: 0000000000000000
+FS:  00007f537abdd740(0000) GS:ffff88885fb00000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007f537aeb8000 CR3: 000000010c248001 CR4: 0000000000372eb0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+<TASK>
+? __warn+0x84/0x190
+? iommu_dma_unmap_page+0x79/0x90
+? report_bug+0xf8/0x1c0
+? handle_bug+0x55/0x90
+? exc_invalid_op+0x13/0x60
+? asm_exc_invalid_op+0x16/0x20
+? iommu_dma_unmap_page+0x79/0x90
+dma_unmap_page_attrs+0xe6/0x290
+mlx5_free_priv_descs+0xb0/0xe0 [mlx5_ib]
+__mlx5_ib_dereg_mr+0x37e/0x520 [mlx5_ib]
+? _raw_spin_unlock_irq+0x24/0x40
+? wait_for_completion+0xfe/0x130
+? rdma_restrack_put+0x63/0xe0 [ib_core]
+ib_dereg_mr_user+0x5f/0x120 [ib_core]
+? lock_release+0xc6/0x280
+destroy_hw_idr_uobject+0x1d/0x60 [ib_uverbs]
+uverbs_destroy_uobject+0x58/0x1d0 [ib_uverbs]
+uobj_destroy+0x3f/0x70 [ib_uverbs]
+ib_uverbs_cmd_verbs+0x3e4/0xbb0 [ib_uverbs]
+? __pfx_uverbs_destroy_def_handler+0x10/0x10 [ib_uverbs]
+? lock_acquire+0xc1/0x2f0
+? ib_uverbs_ioctl+0xcb/0x170 [ib_uverbs]
+? ib_uverbs_ioctl+0x116/0x170 [ib_uverbs]
+? lock_release+0xc6/0x280
+ib_uverbs_ioctl+0xe7/0x170 [ib_uverbs]
+? ib_uverbs_ioctl+0xcb/0x170 [ib_uverbs]
+__x64_sys_ioctl+0x1b0/0xa70
+do_syscall_64+0x6b/0x140
+entry_SYSCALL_64_after_hwframe+0x76/0x7e
+RIP: 0033:0x7f537adaf17b
+Code: 0f 1e fa 48 8b 05 1d ad 0c 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 0f 1f 44 00 00 f3 0f 1e fa b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ed ac 0c 00 f7 d8 64 89 01 48
+RSP: 002b:00007ffff218f0b8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
+RAX: ffffffffffffffda RBX: 00007ffff218f1d8 RCX: 00007f537adaf17b
+RDX: 00007ffff218f1c0 RSI: 00000000c0181b01 RDI: 0000000000000003
+RBP: 00007ffff218f1a0 R08: 00007f537aa8d010 R09: 0000561ee2e4f270
+R10: 00007f537aace3a8 R11: 0000000000000246 R12: 00007ffff218f190
+R13: 000000000000001c R14: 0000561ee2e4d7c0 R15: 00007ffff218f450
+</TASK>
+
+Fixes: f18ec4223117 ("RDMA/mlx5: Use a union inside mlx5_ib_mr")
+Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
+Link: https://patch.msgid.link/2039c22cfc3df02378747ba4d623a558b53fc263.1738587076.git.leon@kernel.org
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/infiniband/hw/mlx5/mr.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
+index 0a3cbb14e1839..753faa9ad06a8 100644
+--- a/drivers/infiniband/hw/mlx5/mr.c
++++ b/drivers/infiniband/hw/mlx5/mr.c
+@@ -1935,7 +1935,8 @@ mlx5_alloc_priv_descs(struct ib_device *device,
+ static void
+ mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
+ {
+-      if (!mr->umem && !mr->data_direct && mr->descs) {
++      if (!mr->umem && !mr->data_direct &&
++          mr->ibmr.type != IB_MR_TYPE_DM && mr->descs) {
+               struct ib_device *device = mr->ibmr.device;
+               int size = mr->max_descs * mr->desc_size;
+               struct mlx5_ib_dev *dev = to_mdev(device);
+-- 
+2.39.5
+
diff --git a/queue-6.13/rdma-mlx5-fix-ah-static-rate-parsing.patch b/queue-6.13/rdma-mlx5-fix-ah-static-rate-parsing.patch

new file mode 100644 (file)

index 0000000..1d4a35e
--- /dev/null
+++ b/queue-6.13/rdma-mlx5-fix-ah-static-rate-parsing.patch
@@ -0,0 +1,84 @@
+From 7a4c9bbda7158de15c7b7b12e6501d94c1150485 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 10 Feb 2025 13:32:39 +0200
+Subject: RDMA/mlx5: Fix AH static rate parsing
+
+From: Patrisious Haddad <phaddad@nvidia.com>
+
+[ Upstream commit c534ffda781f44a1c6ac25ef6e0e444da38ca8af ]
+
+Previously static rate wasn't translated according to our PRM but simply
+used the 4 lower bytes.
+
+Correctly translate static rate value passed in AH creation attribute
+according to our PRM expected values.
+
+In addition change 800GB mapping to zero, which is the PRM
+specified value.
+
+Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters")
+Signed-off-by: Patrisious Haddad <phaddad@nvidia.com>
+Reviewed-by: Maor Gottlieb <maorg@nvidia.com>
+Link: https://patch.msgid.link/18ef4cc5396caf80728341eb74738cd777596f60.1739187089.git.leon@kernel.org
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/infiniband/hw/mlx5/ah.c | 3 ++-
+ drivers/infiniband/hw/mlx5/qp.c | 6 +++---
+ drivers/infiniband/hw/mlx5/qp.h | 1 +
+ 3 files changed, 6 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c
+index 505bc47fd575d..99036afb3aef0 100644
+--- a/drivers/infiniband/hw/mlx5/ah.c
++++ b/drivers/infiniband/hw/mlx5/ah.c
+@@ -67,7 +67,8 @@ static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah,
+               ah->av.tclass = grh->traffic_class;
+       }
+ 
+-      ah->av.stat_rate_sl = (rdma_ah_get_static_rate(ah_attr) << 4);
++      ah->av.stat_rate_sl =
++              (mlx5r_ib_rate(dev, rdma_ah_get_static_rate(ah_attr)) << 4);
+ 
+       if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) {
+               if (init_attr->xmit_slave)
+diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
+index 08d22db8dca91..88724d15705d4 100644
+--- a/drivers/infiniband/hw/mlx5/qp.c
++++ b/drivers/infiniband/hw/mlx5/qp.c
+@@ -3447,11 +3447,11 @@ static int ib_to_mlx5_rate_map(u8 rate)
+       return 0;
+ }
+ 
+-static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
++int mlx5r_ib_rate(struct mlx5_ib_dev *dev, u8 rate)
+ {
+       u32 stat_rate_support;
+ 
+-      if (rate == IB_RATE_PORT_CURRENT)
++      if (rate == IB_RATE_PORT_CURRENT || rate == IB_RATE_800_GBPS)
+               return 0;
+ 
+       if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_800_GBPS)
+@@ -3596,7 +3596,7 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+                      sizeof(grh->dgid.raw));
+       }
+ 
+-      err = ib_rate_to_mlx5(dev, rdma_ah_get_static_rate(ah));
++      err = mlx5r_ib_rate(dev, rdma_ah_get_static_rate(ah));
+       if (err < 0)
+               return err;
+       MLX5_SET(ads, path, stat_rate, err);
+diff --git a/drivers/infiniband/hw/mlx5/qp.h b/drivers/infiniband/hw/mlx5/qp.h
+index b6ee7c3ee1ca1..2530e7730635f 100644
+--- a/drivers/infiniband/hw/mlx5/qp.h
++++ b/drivers/infiniband/hw/mlx5/qp.h
+@@ -56,4 +56,5 @@ int mlx5_core_xrcd_dealloc(struct mlx5_ib_dev *dev, u32 xrcdn);
+ int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter);
+ int mlx5_ib_qp_event_init(void);
+ void mlx5_ib_qp_event_cleanup(void);
++int mlx5r_ib_rate(struct mlx5_ib_dev *dev, u8 rate);
+ #endif /* _MLX5_IB_QP_H */
+-- 
+2.39.5
+
diff --git a/queue-6.13/rdma-mlx5-fix-bind-qp-error-cleanup-flow.patch b/queue-6.13/rdma-mlx5-fix-bind-qp-error-cleanup-flow.patch

new file mode 100644 (file)

index 0000000..121ffb8
--- /dev/null
+++ b/queue-6.13/rdma-mlx5-fix-bind-qp-error-cleanup-flow.patch
@@ -0,0 +1,63 @@
+From de0a37ee8796c9b61beeeaae3c027b374f9cd867 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Feb 2025 08:47:10 +0200
+Subject: RDMA/mlx5: Fix bind QP error cleanup flow
+
+From: Patrisious Haddad <phaddad@nvidia.com>
+
+[ Upstream commit e1a0bdbdfdf08428f0ede5ae49c7f4139ac73ef5 ]
+
+When there is a failure during bind QP, the cleanup flow destroys the
+counter regardless if it is the one that created it or not, which is
+problematic since if it isn't the one that created it, that counter could
+still be in use.
+
+Fix that by destroying the counter only if it was created during this call.
+
+Fixes: 45842fc627c7 ("IB/mlx5: Support statistic q counter configuration")
+Signed-off-by: Patrisious Haddad <phaddad@nvidia.com>
+Reviewed-by: Mark Zhang <markzhang@nvidia.com>
+Link: https://patch.msgid.link/25dfefddb0ebefa668c32e06a94d84e3216257cf.1740033937.git.leon@kernel.org
+Reviewed-by: Zhu Yanjun <yanjun.zhu@linux.dev>
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/infiniband/hw/mlx5/counters.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c
+index 4f6c1968a2ee3..81cfa74147a18 100644
+--- a/drivers/infiniband/hw/mlx5/counters.c
++++ b/drivers/infiniband/hw/mlx5/counters.c
+@@ -546,6 +546,7 @@ static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
+                                  struct ib_qp *qp)
+ {
+       struct mlx5_ib_dev *dev = to_mdev(qp->device);
++      bool new = false;
+       int err;
+ 
+       if (!counter->id) {
+@@ -560,6 +561,7 @@ static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
+                       return err;
+               counter->id =
+                       MLX5_GET(alloc_q_counter_out, out, counter_set_id);
++              new = true;
+       }
+ 
+       err = mlx5_ib_qp_set_counter(qp, counter);
+@@ -569,8 +571,10 @@ static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
+       return 0;
+ 
+ fail_set_counter:
+-      mlx5_ib_counter_dealloc(counter);
+-      counter->id = 0;
++      if (new) {
++              mlx5_ib_counter_dealloc(counter);
++              counter->id = 0;
++      }
+ 
+       return err;
+ }
+-- 
+2.39.5
+
diff --git a/queue-6.13/rdma-mlx5-fix-implicit-odp-hang-on-parent-deregistra.patch b/queue-6.13/rdma-mlx5-fix-implicit-odp-hang-on-parent-deregistra.patch

new file mode 100644 (file)

index 0000000..1750076
--- /dev/null
+++ b/queue-6.13/rdma-mlx5-fix-implicit-odp-hang-on-parent-deregistra.patch
@@ -0,0 +1,84 @@
+From f40756a4b8a9fd9fe636fb81d9f2ce027d7f25bb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 10 Feb 2025 13:31:11 +0200
+Subject: RDMA/mlx5: Fix implicit ODP hang on parent deregistration
+
+From: Yishai Hadas <yishaih@nvidia.com>
+
+[ Upstream commit 3d8c6f26893d55fab218ad086719de1fc9bb86ba ]
+
+Fix the destroy_unused_implicit_child_mr() to prevent hanging during
+parent deregistration as of below [1].
+
+Upon entering destroy_unused_implicit_child_mr(), the reference count
+for the implicit MR parent is incremented using:
+refcount_inc_not_zero().
+
+A corresponding decrement must be performed if
+free_implicit_child_mr_work() is not called.
+
+The code has been updated to properly manage the reference count that
+was incremented.
+
+[1]
+INFO: task python3:2157 blocked for more than 120 seconds.
+Not tainted 6.12.0-rc7+ #1633
+"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+task:python3         state:D stack:0     pid:2157 tgid:2157  ppid:1685   flags:0x00000000
+Call Trace:
+<TASK>
+__schedule+0x420/0xd30
+schedule+0x47/0x130
+__mlx5_ib_dereg_mr+0x379/0x5d0 [mlx5_ib]
+? __pfx_autoremove_wake_function+0x10/0x10
+ib_dereg_mr_user+0x5f/0x120 [ib_core]
+? lock_release+0xc6/0x280
+destroy_hw_idr_uobject+0x1d/0x60 [ib_uverbs]
+uverbs_destroy_uobject+0x58/0x1d0 [ib_uverbs]
+uobj_destroy+0x3f/0x70 [ib_uverbs]
+ib_uverbs_cmd_verbs+0x3e4/0xbb0 [ib_uverbs]
+? __pfx_uverbs_destroy_def_handler+0x10/0x10 [ib_uverbs]
+? lock_acquire+0xc1/0x2f0
+? ib_uverbs_ioctl+0xcb/0x170 [ib_uverbs]
+? ib_uverbs_ioctl+0x116/0x170 [ib_uverbs]
+? lock_release+0xc6/0x280
+ib_uverbs_ioctl+0xe7/0x170 [ib_uverbs]
+? ib_uverbs_ioctl+0xcb/0x170 [ib_uverbs]
+ __x64_sys_ioctl+0x1b0/0xa70
+? kmem_cache_free+0x221/0x400
+do_syscall_64+0x6b/0x140
+entry_SYSCALL_64_after_hwframe+0x76/0x7e
+RIP: 0033:0x7f20f21f017b
+RSP: 002b:00007ffcfc4a77c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
+RAX: ffffffffffffffda RBX: 00007ffcfc4a78d8 RCX: 00007f20f21f017b
+RDX: 00007ffcfc4a78c0 RSI: 00000000c0181b01 RDI: 0000000000000003
+RBP: 00007ffcfc4a78a0 R08: 000056147d125190 R09: 00007f20f1f14c60
+R10: 0000000000000001 R11: 0000000000000246 R12: 00007ffcfc4a7890
+R13: 000000000000001c R14: 000056147d100fc0 R15: 00007f20e365c9d0
+</TASK>
+
+Fixes: d3d930411ce3 ("RDMA/mlx5: Fix implicit ODP use after free")
+Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
+Reviewed-by: Artemy Kovalyov <artemyko@nvidia.com>
+Link: https://patch.msgid.link/80f2fcd19952dfa7d9981d93fd6359b4471f8278.1739186929.git.leon@kernel.org
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/infiniband/hw/mlx5/odp.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
+index 1d3bf56157702..b4e2a6f9cb9c3 100644
+--- a/drivers/infiniband/hw/mlx5/odp.c
++++ b/drivers/infiniband/hw/mlx5/odp.c
+@@ -242,6 +242,7 @@ static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr)
+       if (__xa_cmpxchg(&imr->implicit_children, idx, mr, NULL, GFP_KERNEL) !=
+           mr) {
+               xa_unlock(&imr->implicit_children);
++              mlx5r_deref_odp_mkey(&imr->mmkey);
+               return;
+       }
+ 
+-- 
+2.39.5
+
diff --git a/queue-6.13/rdma-mlx5-fix-the-recovery-flow-of-the-umr-qp.patch b/queue-6.13/rdma-mlx5-fix-the-recovery-flow-of-the-umr-qp.patch

new file mode 100644 (file)

index 0000000..4c4259a
--- /dev/null
+++ b/queue-6.13/rdma-mlx5-fix-the-recovery-flow-of-the-umr-qp.patch
@@ -0,0 +1,209 @@
+From bd0213cbab0b888f59b5e601a2331f8083610f9e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 19 Jan 2025 14:36:13 +0200
+Subject: RDMA/mlx5: Fix the recovery flow of the UMR QP
+
+From: Yishai Hadas <yishaih@nvidia.com>
+
+[ Upstream commit d97505baea64d93538b16baf14ce7b8c1fbad746 ]
+
+This patch addresses an issue in the recovery flow of the UMR QP,
+ensuring tasks do not get stuck, as highlighted by the call trace [1].
+
+During recovery, before transitioning the QP to the RESET state, the
+software must wait for all outstanding WRs to complete.
+
+Failing to do so can cause the firmware to skip sending some flushed
+CQEs with errors and simply discard them upon the RESET, as per the IB
+specification.
+
+This race condition can result in lost CQEs and tasks becoming stuck.
+
+To resolve this, the patch sends a final WR which serves only as a
+barrier before moving the QP state to RESET.
+
+Once a CQE is received for that final WR, it guarantees that no
+outstanding WRs remain, making it safe to transition the QP to RESET and
+subsequently back to RTS, restoring proper functionality.
+
+Note:
+For the barrier WR, we simply reuse the failed and ready WR.
+Since the QP is in an error state, it will only receive
+IB_WC_WR_FLUSH_ERR. However, as it serves only as a barrier we don't
+care about its status.
+
+[1]
+INFO: task rdma_resource_l:1922 blocked for more than 120 seconds.
+Tainted: G        W          6.12.0-rc7+ #1626
+"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+task:rdma_resource_l state:D stack:0  pid:1922 tgid:1922  ppid:1369
+     flags:0x00004004
+Call Trace:
+<TASK>
+__schedule+0x420/0xd30
+schedule+0x47/0x130
+schedule_timeout+0x280/0x300
+? mark_held_locks+0x48/0x80
+? lockdep_hardirqs_on_prepare+0xe5/0x1a0
+wait_for_completion+0x75/0x130
+mlx5r_umr_post_send_wait+0x3c2/0x5b0 [mlx5_ib]
+? __pfx_mlx5r_umr_done+0x10/0x10 [mlx5_ib]
+mlx5r_umr_revoke_mr+0x93/0xc0 [mlx5_ib]
+__mlx5_ib_dereg_mr+0x299/0x520 [mlx5_ib]
+? _raw_spin_unlock_irq+0x24/0x40
+? wait_for_completion+0xfe/0x130
+? rdma_restrack_put+0x63/0xe0 [ib_core]
+ib_dereg_mr_user+0x5f/0x120 [ib_core]
+? lock_release+0xc6/0x280
+destroy_hw_idr_uobject+0x1d/0x60 [ib_uverbs]
+uverbs_destroy_uobject+0x58/0x1d0 [ib_uverbs]
+uobj_destroy+0x3f/0x70 [ib_uverbs]
+ib_uverbs_cmd_verbs+0x3e4/0xbb0 [ib_uverbs]
+? __pfx_uverbs_destroy_def_handler+0x10/0x10 [ib_uverbs]
+? __lock_acquire+0x64e/0x2080
+? mark_held_locks+0x48/0x80
+? find_held_lock+0x2d/0xa0
+? lock_acquire+0xc1/0x2f0
+? ib_uverbs_ioctl+0xcb/0x170 [ib_uverbs]
+? __fget_files+0xc3/0x1b0
+ib_uverbs_ioctl+0xe7/0x170 [ib_uverbs]
+? ib_uverbs_ioctl+0xcb/0x170 [ib_uverbs]
+__x64_sys_ioctl+0x1b0/0xa70
+do_syscall_64+0x6b/0x140
+entry_SYSCALL_64_after_hwframe+0x76/0x7e
+RIP: 0033:0x7f99c918b17b
+RSP: 002b:00007ffc766d0468 EFLAGS: 00000246 ORIG_RAX:
+     0000000000000010
+RAX: ffffffffffffffda RBX: 00007ffc766d0578 RCX:
+     00007f99c918b17b
+RDX: 00007ffc766d0560 RSI: 00000000c0181b01 RDI:
+     0000000000000003
+RBP: 00007ffc766d0540 R08: 00007f99c8f99010 R09:
+     000000000000bd7e
+R10: 00007f99c94c1c70 R11: 0000000000000246 R12:
+     00007ffc766d0530
+R13: 000000000000001c R14: 0000000040246a80 R15:
+     0000000000000000
+</TASK>
+
+Fixes: 158e71bb69e3 ("RDMA/mlx5: Add a umr recovery flow")
+Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
+Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
+Link: https://patch.msgid.link/27b51b92ec42dfb09d8096fcbd51878f397ce6ec.1737290141.git.leon@kernel.org
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/infiniband/hw/mlx5/umr.c | 83 +++++++++++++++++++++-----------
+ 1 file changed, 56 insertions(+), 27 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c
+index 887fd6fa3ba93..793f3c5c4d012 100644
+--- a/drivers/infiniband/hw/mlx5/umr.c
++++ b/drivers/infiniband/hw/mlx5/umr.c
+@@ -231,30 +231,6 @@ void mlx5r_umr_cleanup(struct mlx5_ib_dev *dev)
+       ib_dealloc_pd(dev->umrc.pd);
+ }
+ 
+-static int mlx5r_umr_recover(struct mlx5_ib_dev *dev)
+-{
+-      struct umr_common *umrc = &dev->umrc;
+-      struct ib_qp_attr attr;
+-      int err;
+-
+-      attr.qp_state = IB_QPS_RESET;
+-      err = ib_modify_qp(umrc->qp, &attr, IB_QP_STATE);
+-      if (err) {
+-              mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
+-              goto err;
+-      }
+-
+-      err = mlx5r_umr_qp_rst2rts(dev, umrc->qp);
+-      if (err)
+-              goto err;
+-
+-      umrc->state = MLX5_UMR_STATE_ACTIVE;
+-      return 0;
+-
+-err:
+-      umrc->state = MLX5_UMR_STATE_ERR;
+-      return err;
+-}
+ 
+ static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe,
+                              struct mlx5r_umr_wqe *wqe, bool with_data)
+@@ -302,6 +278,61 @@ static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe,
+       return err;
+ }
+ 
++static int mlx5r_umr_recover(struct mlx5_ib_dev *dev, u32 mkey,
++                           struct mlx5r_umr_context *umr_context,
++                           struct mlx5r_umr_wqe *wqe, bool with_data)
++{
++      struct umr_common *umrc = &dev->umrc;
++      struct ib_qp_attr attr;
++      int err;
++
++      mutex_lock(&umrc->lock);
++      /* Preventing any further WRs to be sent now */
++      if (umrc->state != MLX5_UMR_STATE_RECOVER) {
++              mlx5_ib_warn(dev, "UMR recovery encountered an unexpected state=%d\n",
++                           umrc->state);
++              umrc->state = MLX5_UMR_STATE_RECOVER;
++      }
++      mutex_unlock(&umrc->lock);
++
++      /* Sending a final/barrier WR (the failed one) and wait for its completion.
++       * This will ensure that all the previous WRs got a completion before
++       * we set the QP state to RESET.
++       */
++      err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context->cqe, wqe,
++                                with_data);
++      if (err) {
++              mlx5_ib_warn(dev, "UMR recovery post send failed, err %d\n", err);
++              goto err;
++      }
++
++      /* Since the QP is in an error state, it will only receive
++       * IB_WC_WR_FLUSH_ERR. However, as it serves only as a barrier
++       * we don't care about its status.
++       */
++      wait_for_completion(&umr_context->done);
++
++      attr.qp_state = IB_QPS_RESET;
++      err = ib_modify_qp(umrc->qp, &attr, IB_QP_STATE);
++      if (err) {
++              mlx5_ib_warn(dev, "Couldn't modify UMR QP to RESET, err=%d\n", err);
++              goto err;
++      }
++
++      err = mlx5r_umr_qp_rst2rts(dev, umrc->qp);
++      if (err) {
++              mlx5_ib_warn(dev, "Couldn't modify UMR QP to RTS, err=%d\n", err);
++              goto err;
++      }
++
++      umrc->state = MLX5_UMR_STATE_ACTIVE;
++      return 0;
++
++err:
++      umrc->state = MLX5_UMR_STATE_ERR;
++      return err;
++}
++
+ static void mlx5r_umr_done(struct ib_cq *cq, struct ib_wc *wc)
+ {
+       struct mlx5_ib_umr_context *context =
+@@ -366,9 +397,7 @@ static int mlx5r_umr_post_send_wait(struct mlx5_ib_dev *dev, u32 mkey,
+               mlx5_ib_warn(dev,
+                       "reg umr failed (%u). Trying to recover and resubmit the flushed WQEs, mkey = %u\n",
+                       umr_context.status, mkey);
+-              mutex_lock(&umrc->lock);
+-              err = mlx5r_umr_recover(dev);
+-              mutex_unlock(&umrc->lock);
++              err = mlx5r_umr_recover(dev, mkey, &umr_context, wqe, with_data);
+               if (err)
+                       mlx5_ib_warn(dev, "couldn't recover UMR, err %d\n",
+                                    err);
+-- 
+2.39.5
+
diff --git a/queue-6.13/scsi-core-clear-driver-private-data-when-retrying-re.patch b/queue-6.13/scsi-core-clear-driver-private-data-when-retrying-re.patch

new file mode 100644 (file)

index 0000000..f2dce4b
--- /dev/null
+++ b/queue-6.13/scsi-core-clear-driver-private-data-when-retrying-re.patch
@@ -0,0 +1,68 @@
+From 7c67b6d2cbea5efcfb6e92805b57d24b6aa87fe9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Feb 2025 10:16:28 +0800
+Subject: scsi: core: Clear driver private data when retrying request
+
+From: Ye Bin <yebin10@huawei.com>
+
+[ Upstream commit dce5c4afd035e8090a26e5d776b1682c0e649683 ]
+
+After commit 1bad6c4a57ef ("scsi: zero per-cmd private driver data for each
+MQ I/O"), the xen-scsifront/virtio_scsi/snic drivers all removed code that
+explicitly zeroed driver-private command data.
+
+In combination with commit 464a00c9e0ad ("scsi: core: Kill DRIVER_SENSE"),
+after virtio_scsi performs a capacity expansion, the first request will
+return a unit attention to indicate that the capacity has changed. And then
+the original command is retried. As driver-private command data was not
+cleared, the request would return UA again and eventually time out and fail.
+
+Zero driver-private command data when a request is retried.
+
+Fixes: f7de50da1479 ("scsi: xen-scsifront: Remove code that zeroes driver-private command data")
+Fixes: c2bb87318baa ("scsi: virtio_scsi: Remove code that zeroes driver-private command data")
+Fixes: c3006a926468 ("scsi: snic: Remove code that zeroes driver-private command data")
+Signed-off-by: Ye Bin <yebin10@huawei.com>
+Reviewed-by: Bart Van Assche <bvanassche@acm.org>
+Link: https://lore.kernel.org/r/20250217021628.2929248-1-yebin@huaweicloud.com
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/scsi_lib.c | 14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
+index 5f9b107ae267f..43766589bfc6e 100644
+--- a/drivers/scsi/scsi_lib.c
++++ b/drivers/scsi/scsi_lib.c
+@@ -1656,13 +1656,6 @@ static blk_status_t scsi_prepare_cmd(struct request *req)
+       if (in_flight)
+               __set_bit(SCMD_STATE_INFLIGHT, &cmd->state);
+ 
+-      /*
+-       * Only clear the driver-private command data if the LLD does not supply
+-       * a function to initialize that data.
+-       */
+-      if (!shost->hostt->init_cmd_priv)
+-              memset(cmd + 1, 0, shost->hostt->cmd_size);
+-
+       cmd->prot_op = SCSI_PROT_NORMAL;
+       if (blk_rq_bytes(req))
+               cmd->sc_data_direction = rq_dma_dir(req);
+@@ -1829,6 +1822,13 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
+       if (!scsi_host_queue_ready(q, shost, sdev, cmd))
+               goto out_dec_target_busy;
+ 
++      /*
++       * Only clear the driver-private command data if the LLD does not supply
++       * a function to initialize that data.
++       */
++      if (shost->hostt->cmd_size && !shost->hostt->init_cmd_priv)
++              memset(cmd + 1, 0, shost->hostt->cmd_size);
++
+       if (!(req->rq_flags & RQF_DONTPREP)) {
+               ret = scsi_prepare_cmd(req);
+               if (ret != BLK_STS_OK)
+-- 
+2.39.5
+
diff --git a/queue-6.13/scsi-ufs-core-fix-ufshcd_is_ufs_dev_busy-and-ufshcd_.patch b/queue-6.13/scsi-ufs-core-fix-ufshcd_is_ufs_dev_busy-and-ufshcd_.patch

new file mode 100644 (file)

index 0000000..3517ede
--- /dev/null
+++ b/queue-6.13/scsi-ufs-core-fix-ufshcd_is_ufs_dev_busy-and-ufshcd_.patch
@@ -0,0 +1,62 @@
+From 6087445080212aee14927669f83fbc10f1d0e021 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 14 Feb 2025 14:43:44 -0800
+Subject: scsi: ufs: core: Fix ufshcd_is_ufs_dev_busy() and
+ ufshcd_eh_timed_out()
+
+From: Bart Van Assche <bvanassche@acm.org>
+
+[ Upstream commit 4fa382be430421e1445f9c95c4dc9b7e0949ae8a ]
+
+ufshcd_is_ufs_dev_busy(), ufshcd_print_host_state() and
+ufshcd_eh_timed_out() are used in both modes (legacy mode and MCQ mode).
+hba->outstanding_reqs only represents the outstanding requests in legacy
+mode. Hence, change hba->outstanding_reqs into scsi_host_busy(hba->host) in
+these functions.
+
+Fixes: eacb139b77ff ("scsi: ufs: core: mcq: Enable multi-circular queue")
+Signed-off-by: Bart Van Assche <bvanassche@acm.org>
+Link: https://lore.kernel.org/r/20250214224352.3025151-1-bvanassche@acm.org
+Reviewed-by: Peter Wang <peter.wang@mediatek.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/ufs/core/ufshcd.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
+index 56b32d245c2ee..37b626e128f95 100644
+--- a/drivers/ufs/core/ufshcd.c
++++ b/drivers/ufs/core/ufshcd.c
+@@ -266,7 +266,7 @@ static bool ufshcd_has_pending_tasks(struct ufs_hba *hba)
+ 
+ static bool ufshcd_is_ufs_dev_busy(struct ufs_hba *hba)
+ {
+-      return hba->outstanding_reqs || ufshcd_has_pending_tasks(hba);
++      return scsi_host_busy(hba->host) || ufshcd_has_pending_tasks(hba);
+ }
+ 
+ static const struct ufs_dev_quirk ufs_fixups[] = {
+@@ -628,8 +628,8 @@ static void ufshcd_print_host_state(struct ufs_hba *hba)
+       const struct scsi_device *sdev_ufs = hba->ufs_device_wlun;
+ 
+       dev_err(hba->dev, "UFS Host state=%d\n", hba->ufshcd_state);
+-      dev_err(hba->dev, "outstanding reqs=0x%lx tasks=0x%lx\n",
+-              hba->outstanding_reqs, hba->outstanding_tasks);
++      dev_err(hba->dev, "%d outstanding reqs, tasks=0x%lx\n",
++              scsi_host_busy(hba->host), hba->outstanding_tasks);
+       dev_err(hba->dev, "saved_err=0x%x, saved_uic_err=0x%x\n",
+               hba->saved_err, hba->saved_uic_err);
+       dev_err(hba->dev, "Device power mode=%d, UIC link state=%d\n",
+@@ -8944,7 +8944,7 @@ static enum scsi_timeout_action ufshcd_eh_timed_out(struct scsi_cmnd *scmd)
+       dev_info(hba->dev, "%s() finished; outstanding_tasks = %#lx.\n",
+                __func__, hba->outstanding_tasks);
+ 
+-      return hba->outstanding_reqs ? SCSI_EH_RESET_TIMER : SCSI_EH_DONE;
++      return scsi_host_busy(hba->host) ? SCSI_EH_RESET_TIMER : SCSI_EH_DONE;
+ }
+ 
+ static const struct attribute_group *ufshcd_driver_groups[] = {
+-- 
+2.39.5
+
diff --git a/queue-6.13/scsi-ufs-core-set-default-runtime-system-pm-levels-b.patch b/queue-6.13/scsi-ufs-core-set-default-runtime-system-pm-levels-b.patch

new file mode 100644 (file)

index 0000000..7fbc149
--- /dev/null
+++ b/queue-6.13/scsi-ufs-core-set-default-runtime-system-pm-levels-b.patch
@@ -0,0 +1,83 @@
+From a8d422b16457ecc97ac1d7857e69e1d448522cac Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Feb 2025 16:20:47 +0530
+Subject: scsi: ufs: core: Set default runtime/system PM levels before
+ ufshcd_hba_init()
+
+From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+
+[ Upstream commit fe06b7c07f3fbcce2a2ca6f7b0d543b5699ea00f ]
+
+Commit bb9850704c04 ("scsi: ufs: core: Honor runtime/system PM levels if
+set by host controller drivers") introduced the check for setting default
+PM levels only if the levels are uninitialized by the host controller
+drivers. But it missed the fact that the levels could be initialized to 0
+(UFS_PM_LVL_0) on purpose by the controller drivers. Even though none of
+the drivers are doing so now, the logic should be fixed irrespectively.
+
+So set the default levels unconditionally before calling ufshcd_hba_init()
+API which initializes the controller drivers. It ensures that the
+controller drivers could override the default levels if required.
+
+Fixes: bb9850704c04 ("scsi: ufs: core: Honor runtime/system PM levels if set by host controller drivers")
+Reported-by: Bao D. Nguyen <quic_nguyenb@quicinc.com>
+Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Link: https://lore.kernel.org/r/20250219105047.49932-1-manivannan.sadhasivam@linaro.org
+Reviewed-by: Bart Van Assche <bvanassche@acm.org>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/ufs/core/ufshcd.c | 30 +++++++++++++++---------------
+ 1 file changed, 15 insertions(+), 15 deletions(-)
+
+diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
+index 37b626e128f95..a5bb6ea96460c 100644
+--- a/drivers/ufs/core/ufshcd.c
++++ b/drivers/ufs/core/ufshcd.c
+@@ -10494,6 +10494,21 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
+        */
+       spin_lock_init(&hba->clk_gating.lock);
+ 
++      /*
++       * Set the default power management level for runtime and system PM.
++       * Host controller drivers can override them in their
++       * 'ufs_hba_variant_ops::init' callback.
++       *
++       * Default power saving mode is to keep UFS link in Hibern8 state
++       * and UFS device in sleep state.
++       */
++      hba->rpm_lvl = ufs_get_desired_pm_lvl_for_dev_link_state(
++                                              UFS_SLEEP_PWR_MODE,
++                                              UIC_LINK_HIBERN8_STATE);
++      hba->spm_lvl = ufs_get_desired_pm_lvl_for_dev_link_state(
++                                              UFS_SLEEP_PWR_MODE,
++                                              UIC_LINK_HIBERN8_STATE);
++
+       err = ufshcd_hba_init(hba);
+       if (err)
+               goto out_error;
+@@ -10607,21 +10622,6 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
+               goto out_disable;
+       }
+ 
+-      /*
+-       * Set the default power management level for runtime and system PM if
+-       * not set by the host controller drivers.
+-       * Default power saving mode is to keep UFS link in Hibern8 state
+-       * and UFS device in sleep state.
+-       */
+-      if (!hba->rpm_lvl)
+-              hba->rpm_lvl = ufs_get_desired_pm_lvl_for_dev_link_state(
+-                                              UFS_SLEEP_PWR_MODE,
+-                                              UIC_LINK_HIBERN8_STATE);
+-      if (!hba->spm_lvl)
+-              hba->spm_lvl = ufs_get_desired_pm_lvl_for_dev_link_state(
+-                                              UFS_SLEEP_PWR_MODE,
+-                                              UIC_LINK_HIBERN8_STATE);
+-
+       INIT_DELAYED_WORK(&hba->rpm_dev_flush_recheck_work, ufshcd_rpm_dev_flush_recheck_work);
+       INIT_DELAYED_WORK(&hba->ufs_rtc_update_work, ufshcd_rtc_work);
+ 
+-- 
+2.39.5
+
diff --git a/queue-6.13/series b/queue-6.13/series

new file mode 100644 (file)

index 0000000..d24a1d3
--- /dev/null
+++ b/queue-6.13/series
@@ -0,0 +1,24 @@
+rdma-mlx5-fix-the-recovery-flow-of-the-umr-qp.patch
+ib-mlx5-set-and-get-correct-qp_num-for-a-dct-qp.patch
+rdma-mlx5-fix-a-race-for-dmabuf-mr-which-can-lead-to.patch
+rdma-mlx5-fix-a-warn-during-dereg_mr-for-dm-type.patch
+rdma-mana_ib-allocate-page-aligned-doorbell-index.patch
+rdma-hns-fix-mbox-timing-out-by-adding-retry-mechani.patch
+rdma-bnxt_re-add-sanity-checks-on-rdev-validity.patch
+rdma-bnxt_re-allocate-dev_attr-information-dynamical.patch
+rdma-bnxt_re-fix-the-statistics-for-gen-p7-vf.patch
+landlock-fix-non-tcp-sockets-restriction.patch
+scsi-ufs-core-fix-ufshcd_is_ufs_dev_busy-and-ufshcd_.patch
+ovl-fix-uaf-in-ovl_dentry_update_reval-by-moving-dpu.patch
+nfs-o_direct-writes-must-check-and-adjust-the-file-l.patch
+nfs-adjust-delegated-timestamps-for-o_direct-reads-a.patch
+sunrpc-prevent-looping-due-to-rpc_signal_task-races.patch
+nfsv4-fix-a-deadlock-when-recovering-state-on-a-sill.patch
+sunrpc-handle-etimedout-return-from-tlshd.patch
+rdma-mlx5-fix-implicit-odp-hang-on-parent-deregistra.patch
+rdma-mlx5-fix-ah-static-rate-parsing.patch
+scsi-core-clear-driver-private-data-when-retrying-re.patch
+scsi-ufs-core-set-default-runtime-system-pm-levels-b.patch
+rdma-mlx5-fix-bind-qp-error-cleanup-flow.patch
+rdma-bnxt_re-fix-the-page-details-for-the-srq-create.patch
+sunrpc-suppress-warnings-for-unused-procfs-functions.patch
diff --git a/queue-6.13/sunrpc-handle-etimedout-return-from-tlshd.patch b/queue-6.13/sunrpc-handle-etimedout-return-from-tlshd.patch

new file mode 100644 (file)

index 0000000..2e13352
--- /dev/null
+++ b/queue-6.13/sunrpc-handle-etimedout-return-from-tlshd.patch
@@ -0,0 +1,48 @@
+From 4756d0780bc05b8685655e89514225c94d5a2e13 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 11 Feb 2025 12:31:57 -0500
+Subject: SUNRPC: Handle -ETIMEDOUT return from tlshd
+
+From: Benjamin Coddington <bcodding@redhat.com>
+
+[ Upstream commit 7a2f6f7687c5f7083a35317cddec5ad9fa491443 ]
+
+If the TLS handshake attempt returns -ETIMEDOUT, we currently translate
+that error into -EACCES.  This becomes problematic for cases where the RPC
+layer is attempting to re-connect in paths that don't resonably handle
+-EACCES, for example: writeback.  The RPC layer can handle -ETIMEDOUT quite
+well, however - so if the handshake returns this error let's just pass it
+along.
+
+Fixes: 75eb6af7acdf ("SUNRPC: Add a TCP-with-TLS RPC transport class")
+Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
+Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sunrpc/xprtsock.c | 10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
+index c60936d8cef71..6b80b2aaf7639 100644
+--- a/net/sunrpc/xprtsock.c
++++ b/net/sunrpc/xprtsock.c
+@@ -2581,7 +2581,15 @@ static void xs_tls_handshake_done(void *data, int status, key_serial_t peerid)
+       struct sock_xprt *lower_transport =
+                               container_of(lower_xprt, struct sock_xprt, xprt);
+ 
+-      lower_transport->xprt_err = status ? -EACCES : 0;
++      switch (status) {
++      case 0:
++      case -EACCES:
++      case -ETIMEDOUT:
++              lower_transport->xprt_err = status;
++              break;
++      default:
++              lower_transport->xprt_err = -EACCES;
++      }
+       complete(&lower_transport->handshake_done);
+       xprt_put(lower_xprt);
+ }
+-- 
+2.39.5
+
diff --git a/queue-6.13/sunrpc-prevent-looping-due-to-rpc_signal_task-races.patch b/queue-6.13/sunrpc-prevent-looping-due-to-rpc_signal_task-races.patch

new file mode 100644 (file)

index 0000000..2ac9e59
--- /dev/null
+++ b/queue-6.13/sunrpc-prevent-looping-due-to-rpc_signal_task-races.patch
@@ -0,0 +1,79 @@
+From 73cd5d8700db85dfc73da303d3c91ea267ca5d6c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 1 Feb 2025 15:00:02 -0500
+Subject: SUNRPC: Prevent looping due to rpc_signal_task() races
+
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+
+[ Upstream commit 5bbd6e863b15a85221e49b9bdb2d5d8f0bb91f3d ]
+
+If rpc_signal_task() is called while a task is in an rpc_call_done()
+callback function, and the latter calls rpc_restart_call(), the task can
+end up looping due to the RPC_TASK_SIGNALLED flag being set without the
+tk_rpc_status being set.
+Removing the redundant mechanism for signalling the task fixes the
+looping behaviour.
+
+Reported-by: Li Lingfeng <lilingfeng3@huawei.com>
+Fixes: 39494194f93b ("SUNRPC: Fix races with rpc_killall_tasks()")
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/sunrpc/sched.h  | 3 +--
+ include/trace/events/sunrpc.h | 3 +--
+ net/sunrpc/sched.c            | 2 --
+ 3 files changed, 2 insertions(+), 6 deletions(-)
+
+diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
+index fec1e8a1570c3..eac57914dcf32 100644
+--- a/include/linux/sunrpc/sched.h
++++ b/include/linux/sunrpc/sched.h
+@@ -158,7 +158,6 @@ enum {
+       RPC_TASK_NEED_XMIT,
+       RPC_TASK_NEED_RECV,
+       RPC_TASK_MSG_PIN_WAIT,
+-      RPC_TASK_SIGNALLED,
+ };
+ 
+ #define rpc_test_and_set_running(t) \
+@@ -171,7 +170,7 @@ enum {
+ 
+ #define RPC_IS_ACTIVATED(t)   test_bit(RPC_TASK_ACTIVE, &(t)->tk_runstate)
+ 
+-#define RPC_SIGNALLED(t)      test_bit(RPC_TASK_SIGNALLED, &(t)->tk_runstate)
++#define RPC_SIGNALLED(t)      (READ_ONCE(task->tk_rpc_status) == -ERESTARTSYS)
+ 
+ /*
+  * Task priorities.
+diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
+index b13dc275ef4a7..851841336ee65 100644
+--- a/include/trace/events/sunrpc.h
++++ b/include/trace/events/sunrpc.h
+@@ -360,8 +360,7 @@ TRACE_EVENT(rpc_request,
+               { (1UL << RPC_TASK_ACTIVE), "ACTIVE" },                 \
+               { (1UL << RPC_TASK_NEED_XMIT), "NEED_XMIT" },           \
+               { (1UL << RPC_TASK_NEED_RECV), "NEED_RECV" },           \
+-              { (1UL << RPC_TASK_MSG_PIN_WAIT), "MSG_PIN_WAIT" },     \
+-              { (1UL << RPC_TASK_SIGNALLED), "SIGNALLED" })
++              { (1UL << RPC_TASK_MSG_PIN_WAIT), "MSG_PIN_WAIT" })
+ 
+ DECLARE_EVENT_CLASS(rpc_task_running,
+ 
+diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
+index cef623ea15060..9b45fbdc90cab 100644
+--- a/net/sunrpc/sched.c
++++ b/net/sunrpc/sched.c
+@@ -864,8 +864,6 @@ void rpc_signal_task(struct rpc_task *task)
+       if (!rpc_task_set_rpc_status(task, -ERESTARTSYS))
+               return;
+       trace_rpc_task_signalled(task, task->tk_action);
+-      set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate);
+-      smp_mb__after_atomic();
+       queue = READ_ONCE(task->tk_waitqueue);
+       if (queue)
+               rpc_wake_up_queued_task(queue, task);
+-- 
+2.39.5
+
diff --git a/queue-6.13/sunrpc-suppress-warnings-for-unused-procfs-functions.patch b/queue-6.13/sunrpc-suppress-warnings-for-unused-procfs-functions.patch

new file mode 100644 (file)

index 0000000..28eb3fd
--- /dev/null
+++ b/queue-6.13/sunrpc-suppress-warnings-for-unused-procfs-functions.patch
@@ -0,0 +1,71 @@
+From 86293082c396d5197c32462801c786d9ed5ec012 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 25 Feb 2025 15:52:21 +0100
+Subject: sunrpc: suppress warnings for unused procfs functions
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+[ Upstream commit 1f7a4f98c11fbeb18ed21f3b3a497e90a50ad2e0 ]
+
+There is a warning about unused variables when building with W=1 and no procfs:
+
+net/sunrpc/cache.c:1660:30: error: 'cache_flush_proc_ops' defined but not used [-Werror=unused-const-variable=]
+ 1660 | static const struct proc_ops cache_flush_proc_ops = {
+      |                              ^~~~~~~~~~~~~~~~~~~~
+net/sunrpc/cache.c:1622:30: error: 'content_proc_ops' defined but not used [-Werror=unused-const-variable=]
+ 1622 | static const struct proc_ops content_proc_ops = {
+      |                              ^~~~~~~~~~~~~~~~
+net/sunrpc/cache.c:1598:30: error: 'cache_channel_proc_ops' defined but not used [-Werror=unused-const-variable=]
+ 1598 | static const struct proc_ops cache_channel_proc_ops = {
+      |                              ^~~~~~~~~~~~~~~~~~~~~~
+
+These are used inside of an #ifdef, so replacing that with an
+IS_ENABLED() check lets the compiler see how they are used while
+still dropping them during dead code elimination.
+
+Fixes: dbf847ecb631 ("knfsd: allow cache_register to return error on failure")
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Acked-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sunrpc/cache.c | 10 +++-------
+ 1 file changed, 3 insertions(+), 7 deletions(-)
+
+diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
+index 059f6ef1ad189..7fcb0574fc79e 100644
+--- a/net/sunrpc/cache.c
++++ b/net/sunrpc/cache.c
+@@ -1669,12 +1669,14 @@ static void remove_cache_proc_entries(struct cache_detail *cd)
+       }
+ }
+ 
+-#ifdef CONFIG_PROC_FS
+ static int create_cache_proc_entries(struct cache_detail *cd, struct net *net)
+ {
+       struct proc_dir_entry *p;
+       struct sunrpc_net *sn;
+ 
++      if (!IS_ENABLED(CONFIG_PROC_FS))
++              return 0;
++
+       sn = net_generic(net, sunrpc_net_id);
+       cd->procfs = proc_mkdir(cd->name, sn->proc_net_rpc);
+       if (cd->procfs == NULL)
+@@ -1702,12 +1704,6 @@ static int create_cache_proc_entries(struct cache_detail *cd, struct net *net)
+       remove_cache_proc_entries(cd);
+       return -ENOMEM;
+ }
+-#else /* CONFIG_PROC_FS */
+-static int create_cache_proc_entries(struct cache_detail *cd, struct net *net)
+-{
+-      return 0;
+-}
+-#endif
+ 
+ void __init cache_initialize(void)
+ {
+-- 
+2.39.5
+
author	Sasha Levin <sashal@kernel.org>
	Fri, 28 Feb 2025 04:41:20 +0000 (23:41 -0500)
committer	Sasha Levin <sashal@kernel.org>
	Fri, 28 Feb 2025 04:41:20 +0000 (23:41 -0500)
queue-6.13/ib-mlx5-set-and-get-correct-qp_num-for-a-dct-qp.patch	[new file with mode: 0644]	patch \| blob
queue-6.13/landlock-fix-non-tcp-sockets-restriction.patch	[new file with mode: 0644]	patch \| blob
queue-6.13/nfs-adjust-delegated-timestamps-for-o_direct-reads-a.patch	[new file with mode: 0644]	patch \| blob
queue-6.13/nfs-o_direct-writes-must-check-and-adjust-the-file-l.patch	[new file with mode: 0644]	patch \| blob
queue-6.13/nfsv4-fix-a-deadlock-when-recovering-state-on-a-sill.patch	[new file with mode: 0644]	patch \| blob
queue-6.13/ovl-fix-uaf-in-ovl_dentry_update_reval-by-moving-dpu.patch	[new file with mode: 0644]	patch \| blob
queue-6.13/rdma-bnxt_re-add-sanity-checks-on-rdev-validity.patch	[new file with mode: 0644]	patch \| blob
queue-6.13/rdma-bnxt_re-allocate-dev_attr-information-dynamical.patch	[new file with mode: 0644]	patch \| blob
queue-6.13/rdma-bnxt_re-fix-the-page-details-for-the-srq-create.patch	[new file with mode: 0644]	patch \| blob
queue-6.13/rdma-bnxt_re-fix-the-statistics-for-gen-p7-vf.patch	[new file with mode: 0644]	patch \| blob
queue-6.13/rdma-hns-fix-mbox-timing-out-by-adding-retry-mechani.patch	[new file with mode: 0644]	patch \| blob
queue-6.13/rdma-mana_ib-allocate-page-aligned-doorbell-index.patch	[new file with mode: 0644]	patch \| blob
queue-6.13/rdma-mlx5-fix-a-race-for-dmabuf-mr-which-can-lead-to.patch	[new file with mode: 0644]	patch \| blob
queue-6.13/rdma-mlx5-fix-a-warn-during-dereg_mr-for-dm-type.patch	[new file with mode: 0644]	patch \| blob
queue-6.13/rdma-mlx5-fix-ah-static-rate-parsing.patch	[new file with mode: 0644]	patch \| blob
queue-6.13/rdma-mlx5-fix-bind-qp-error-cleanup-flow.patch	[new file with mode: 0644]	patch \| blob
queue-6.13/rdma-mlx5-fix-implicit-odp-hang-on-parent-deregistra.patch	[new file with mode: 0644]	patch \| blob
queue-6.13/rdma-mlx5-fix-the-recovery-flow-of-the-umr-qp.patch	[new file with mode: 0644]	patch \| blob
queue-6.13/scsi-core-clear-driver-private-data-when-retrying-re.patch	[new file with mode: 0644]	patch \| blob
queue-6.13/scsi-ufs-core-fix-ufshcd_is_ufs_dev_busy-and-ufshcd_.patch	[new file with mode: 0644]	patch \| blob
queue-6.13/scsi-ufs-core-set-default-runtime-system-pm-levels-b.patch	[new file with mode: 0644]	patch \| blob
queue-6.13/series	[new file with mode: 0644]	patch \| blob
queue-6.13/sunrpc-handle-etimedout-return-from-tlshd.patch	[new file with mode: 0644]	patch \| blob
queue-6.13/sunrpc-prevent-looping-due-to-rpc_signal_task-races.patch	[new file with mode: 0644]	patch \| blob
queue-6.13/sunrpc-suppress-warnings-for-unused-procfs-functions.patch	[new file with mode: 0644]	patch \| blob