From: Sasha Levin Date: Fri, 28 Feb 2025 04:41:21 +0000 (-0500) Subject: Fixes for 6.12 X-Git-Tag: v6.6.81~53 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=48b36fd70cf48a34714c8e27b53852ea847d46fa;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 6.12 Signed-off-by: Sasha Levin --- diff --git a/queue-6.12/ib-mlx5-set-and-get-correct-qp_num-for-a-dct-qp.patch b/queue-6.12/ib-mlx5-set-and-get-correct-qp_num-for-a-dct-qp.patch new file mode 100644 index 0000000000..37be86626f --- /dev/null +++ b/queue-6.12/ib-mlx5-set-and-get-correct-qp_num-for-a-dct-qp.patch @@ -0,0 +1,50 @@ +From 7571a8ee3cbdf3efae669943addd753c98fef80c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 19 Jan 2025 14:39:46 +0200 +Subject: IB/mlx5: Set and get correct qp_num for a DCT QP + +From: Mark Zhang + +[ Upstream commit 12d044770e12c4205fa69535b4fa8a9981fea98f ] + +When a DCT QP is created on an active lag, it's dctc.port is assigned +in a round-robin way, which is from 1 to dev->lag_port. In this case +when querying this QP, we may get qp_attr.port_num > 2. +Fix this by setting qp->port when modifying a DCT QP, and read port_num +from qp->port instead of dctc.port when querying it. + +Fixes: 7c4b1ab9f167 ("IB/mlx5: Add DCT RoCE LAG support") +Signed-off-by: Mark Zhang +Reviewed-by: Maher Sanalla +Link: https://patch.msgid.link/94c76bf0adbea997f87ffa27674e0a7118ad92a9.1737290358.git.leon@kernel.org +Signed-off-by: Leon Romanovsky +Signed-off-by: Sasha Levin +--- + drivers/infiniband/hw/mlx5/qp.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c +index 10ce3b44f645f..0d8a8b109a751 100644 +--- a/drivers/infiniband/hw/mlx5/qp.c ++++ b/drivers/infiniband/hw/mlx5/qp.c +@@ -4547,6 +4547,8 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, + + set_id = mlx5_ib_get_counters_id(dev, attr->port_num - 1); + MLX5_SET(dctc, dctc, counter_set_id, set_id); ++ ++ qp->port = attr->port_num; + } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) { + struct mlx5_ib_modify_qp_resp resp = {}; + u32 out[MLX5_ST_SZ_DW(create_dct_out)] = {}; +@@ -5033,7 +5035,7 @@ static int mlx5_ib_dct_query_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *mqp, + } + + if (qp_attr_mask & IB_QP_PORT) +- qp_attr->port_num = MLX5_GET(dctc, dctc, port); ++ qp_attr->port_num = mqp->port; + if (qp_attr_mask & IB_QP_MIN_RNR_TIMER) + qp_attr->min_rnr_timer = MLX5_GET(dctc, dctc, min_rnr_nak); + if (qp_attr_mask & IB_QP_AV) { +-- +2.39.5 + diff --git a/queue-6.12/landlock-fix-non-tcp-sockets-restriction.patch b/queue-6.12/landlock-fix-non-tcp-sockets-restriction.patch new file mode 100644 index 0000000000..1b4addce43 --- /dev/null +++ b/queue-6.12/landlock-fix-non-tcp-sockets-restriction.patch @@ -0,0 +1,64 @@ +From 58b4b9f39089009c14587752b12480f6b0cc21e1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 5 Feb 2025 17:36:49 +0800 +Subject: landlock: Fix non-TCP sockets restriction +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Mikhail Ivanov + +[ Upstream commit 854277e2cc8c75dc3c216c82e72523258fcf65b9 ] + +Use sk_is_tcp() to check if socket is TCP in bind(2) and connect(2) +hooks. + +SMC, MPTCP, SCTP protocols are currently restricted by TCP access +rights. The purpose of TCP access rights is to provide control over +ports that can be used by userland to establish a TCP connection. +Therefore, it is incorrect to deny bind(2) and connect(2) requests for a +socket of another protocol. + +However, SMC, MPTCP and RDS implementations use TCP internal sockets to +establish communication or even to exchange packets over a TCP +connection [1]. Landlock rules that configure bind(2) and connect(2) +usage for TCP sockets should not cover requests for sockets of such +protocols. These protocols have different set of security issues and +security properties, therefore, it is necessary to provide the userland +with the ability to distinguish between them (eg. [2]). + +Control over TCP connection used by other protocols can be achieved with +upcoming support of socket creation control [3]. + +[1] https://lore.kernel.org/all/62336067-18c2-3493-d0ec-6dd6a6d3a1b5@huawei-partners.com/ +[2] https://lore.kernel.org/all/20241204.fahVio7eicim@digikod.net/ +[3] https://lore.kernel.org/all/20240904104824.1844082-1-ivanov.mikhail1@huawei-partners.com/ + +Closes: https://github.com/landlock-lsm/linux/issues/40 +Fixes: fff69fb03dde ("landlock: Support network rules with TCP bind and connect") +Signed-off-by: Mikhail Ivanov +Link: https://lore.kernel.org/r/20250205093651.1424339-2-ivanov.mikhail1@huawei-partners.com +[mic: Format commit message to 72 columns] +Signed-off-by: Mickaël Salaün +Signed-off-by: Sasha Levin +--- + security/landlock/net.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/security/landlock/net.c b/security/landlock/net.c +index d5dcc4407a197..104b6c01fe503 100644 +--- a/security/landlock/net.c ++++ b/security/landlock/net.c +@@ -63,8 +63,7 @@ static int current_check_access_socket(struct socket *const sock, + if (WARN_ON_ONCE(dom->num_layers < 1)) + return -EACCES; + +- /* Checks if it's a (potential) TCP socket. */ +- if (sock->type != SOCK_STREAM) ++ if (!sk_is_tcp(sock->sk)) + return 0; + + /* Checks for minimal header length to safely read sa_family. */ +-- +2.39.5 + diff --git a/queue-6.12/nfs-adjust-delegated-timestamps-for-o_direct-reads-a.patch b/queue-6.12/nfs-adjust-delegated-timestamps-for-o_direct-reads-a.patch new file mode 100644 index 0000000000..ab12eda490 --- /dev/null +++ b/queue-6.12/nfs-adjust-delegated-timestamps-for-o_direct-reads-a.patch @@ -0,0 +1,52 @@ +From 561b13349ae8485d7c6c99b2ce3b264d62cdbb48 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 1 Feb 2025 14:59:03 -0500 +Subject: NFS: Adjust delegated timestamps for O_DIRECT reads and writes + +From: Trond Myklebust + +[ Upstream commit 88025c67fe3c025a0123bc7af50535b97f7af89a ] + +Adjust the timestamps if O_DIRECT is being combined with attribute +delegations. + +Fixes: e12912d94137 ("NFSv4: Add support for delegated atime and mtime attributes") +Signed-off-by: Trond Myklebust +Signed-off-by: Anna Schumaker +Signed-off-by: Sasha Levin +--- + fs/nfs/direct.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c +index 2784586f93fc0..c1f1b826888c9 100644 +--- a/fs/nfs/direct.c ++++ b/fs/nfs/direct.c +@@ -56,6 +56,7 @@ + #include + #include + ++#include "delegation.h" + #include "internal.h" + #include "iostat.h" + #include "pnfs.h" +@@ -286,6 +287,8 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) + nfs_direct_count_bytes(dreq, hdr); + spin_unlock(&dreq->lock); + ++ nfs_update_delegated_atime(dreq->inode); ++ + while (!list_empty(&hdr->pages)) { + struct nfs_page *req = nfs_list_entry(hdr->pages.next); + struct page *page = req->wb_page; +@@ -770,6 +773,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) + + spin_lock(&inode->i_lock); + nfs_direct_file_adjust_size_locked(inode, dreq->io_start, dreq->count); ++ nfs_update_delegated_mtime_locked(dreq->inode); + spin_unlock(&inode->i_lock); + + while (!list_empty(&hdr->pages)) { +-- +2.39.5 + diff --git a/queue-6.12/nfs-o_direct-writes-must-check-and-adjust-the-file-l.patch b/queue-6.12/nfs-o_direct-writes-must-check-and-adjust-the-file-l.patch new file mode 100644 index 0000000000..8a341f3389 --- /dev/null +++ b/queue-6.12/nfs-o_direct-writes-must-check-and-adjust-the-file-l.patch @@ -0,0 +1,72 @@ +From ec9ca8102f23e2fd9f3ea3a9548af408d359f193 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 1 Feb 2025 14:59:02 -0500 +Subject: NFS: O_DIRECT writes must check and adjust the file length + +From: Trond Myklebust + +[ Upstream commit fcf857ee1958e9247298251f7615d0c76f1e9b38 ] + +While it is uncommon for delegations to be held while O_DIRECT writes +are in progress, it is possible. The xfstests generic/647 and +generic/729 both end up triggering that state, and end up failing due to +the fact that the file size is not adjusted. + +Reported-by: Chuck Lever +Link: https://bugzilla.kernel.org/show_bug.cgi?id=219738 +Cc: stable@vger.kernel.org +Signed-off-by: Trond Myklebust +Signed-off-by: Anna Schumaker +Stable-dep-of: 88025c67fe3c ("NFS: Adjust delegated timestamps for O_DIRECT reads and writes") +Signed-off-by: Sasha Levin +--- + fs/nfs/direct.c | 19 +++++++++++++++++++ + 1 file changed, 19 insertions(+) + +diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c +index 90079ca134dd3..2784586f93fc0 100644 +--- a/fs/nfs/direct.c ++++ b/fs/nfs/direct.c +@@ -130,6 +130,20 @@ static void nfs_direct_truncate_request(struct nfs_direct_req *dreq, + dreq->count = req_start; + } + ++static void nfs_direct_file_adjust_size_locked(struct inode *inode, ++ loff_t offset, size_t count) ++{ ++ loff_t newsize = offset + (loff_t)count; ++ loff_t oldsize = i_size_read(inode); ++ ++ if (newsize > oldsize) { ++ i_size_write(inode, newsize); ++ NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_SIZE; ++ trace_nfs_size_grow(inode, newsize); ++ nfs_inc_stats(inode, NFSIOS_EXTENDWRITE); ++ } ++} ++ + /** + * nfs_swap_rw - NFS address space operation for swap I/O + * @iocb: target I/O control block +@@ -732,6 +746,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) + struct nfs_direct_req *dreq = hdr->dreq; + struct nfs_commit_info cinfo; + struct nfs_page *req = nfs_list_entry(hdr->pages.next); ++ struct inode *inode = dreq->inode; + int flags = NFS_ODIRECT_DONE; + + trace_nfs_direct_write_completion(dreq); +@@ -753,6 +768,10 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) + } + spin_unlock(&dreq->lock); + ++ spin_lock(&inode->i_lock); ++ nfs_direct_file_adjust_size_locked(inode, dreq->io_start, dreq->count); ++ spin_unlock(&inode->i_lock); ++ + while (!list_empty(&hdr->pages)) { + + req = nfs_list_entry(hdr->pages.next); +-- +2.39.5 + diff --git a/queue-6.12/nfsv4-fix-a-deadlock-when-recovering-state-on-a-sill.patch b/queue-6.12/nfsv4-fix-a-deadlock-when-recovering-state-on-a-sill.patch new file mode 100644 index 0000000000..c470d3028b --- /dev/null +++ b/queue-6.12/nfsv4-fix-a-deadlock-when-recovering-state-on-a-sill.patch @@ -0,0 +1,108 @@ +From 27ab39054effae8e4d586b2b75e6dfd5a4288fea Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 1 Feb 2025 15:00:09 -0500 +Subject: NFSv4: Fix a deadlock when recovering state on a sillyrenamed file + +From: Trond Myklebust + +[ Upstream commit 8f8df955f078e1a023ee55161935000a67651f38 ] + +If the file is sillyrenamed, and slated for delete on close, it is +possible for a server reboot to triggeer an open reclaim, with can again +race with the application call to close(). When that happens, the call +to put_nfs_open_context() can trigger a synchronous delegreturn call +which deadlocks because it is not marked as privileged. + +Instead, ensure that the call to nfs4_inode_return_delegation_on_close() +catches the delegreturn, and schedules it asynchronously. + +Reported-by: Li Lingfeng +Fixes: adb4b42d19ae ("Return the delegation when deleting sillyrenamed files") +Signed-off-by: Trond Myklebust +Signed-off-by: Anna Schumaker +Signed-off-by: Sasha Levin +--- + fs/nfs/delegation.c | 37 +++++++++++++++++++++++++++++++++++++ + fs/nfs/delegation.h | 1 + + fs/nfs/nfs4proc.c | 3 +++ + 3 files changed, 41 insertions(+) + +diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c +index 035ba52742a50..4db912f562305 100644 +--- a/fs/nfs/delegation.c ++++ b/fs/nfs/delegation.c +@@ -780,6 +780,43 @@ int nfs4_inode_return_delegation(struct inode *inode) + return 0; + } + ++/** ++ * nfs4_inode_set_return_delegation_on_close - asynchronously return a delegation ++ * @inode: inode to process ++ * ++ * This routine is called to request that the delegation be returned as soon ++ * as the file is closed. If the file is already closed, the delegation is ++ * immediately returned. ++ */ ++void nfs4_inode_set_return_delegation_on_close(struct inode *inode) ++{ ++ struct nfs_delegation *delegation; ++ struct nfs_delegation *ret = NULL; ++ ++ if (!inode) ++ return; ++ rcu_read_lock(); ++ delegation = nfs4_get_valid_delegation(inode); ++ if (!delegation) ++ goto out; ++ spin_lock(&delegation->lock); ++ if (!delegation->inode) ++ goto out_unlock; ++ if (list_empty(&NFS_I(inode)->open_files) && ++ !test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) { ++ /* Refcount matched in nfs_end_delegation_return() */ ++ ret = nfs_get_delegation(delegation); ++ } else ++ set_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags); ++out_unlock: ++ spin_unlock(&delegation->lock); ++ if (ret) ++ nfs_clear_verifier_delegated(inode); ++out: ++ rcu_read_unlock(); ++ nfs_end_delegation_return(inode, ret, 0); ++} ++ + /** + * nfs4_inode_return_delegation_on_close - asynchronously return a delegation + * @inode: inode to process +diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h +index 71524d34ed207..8ff5ab9c5c256 100644 +--- a/fs/nfs/delegation.h ++++ b/fs/nfs/delegation.h +@@ -49,6 +49,7 @@ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred, + unsigned long pagemod_limit, u32 deleg_type); + int nfs4_inode_return_delegation(struct inode *inode); + void nfs4_inode_return_delegation_on_close(struct inode *inode); ++void nfs4_inode_set_return_delegation_on_close(struct inode *inode); + int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); + void nfs_inode_evict_delegation(struct inode *inode); + +diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c +index 405f17e6e0b45..e7bc99c69743c 100644 +--- a/fs/nfs/nfs4proc.c ++++ b/fs/nfs/nfs4proc.c +@@ -3898,8 +3898,11 @@ nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, + + static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) + { ++ struct dentry *dentry = ctx->dentry; + if (ctx->state == NULL) + return; ++ if (dentry->d_flags & DCACHE_NFSFS_RENAMED) ++ nfs4_inode_set_return_delegation_on_close(d_inode(dentry)); + if (is_sync) + nfs4_close_sync(ctx->state, _nfs4_ctx_to_openmode(ctx)); + else +-- +2.39.5 + diff --git a/queue-6.12/ovl-fix-uaf-in-ovl_dentry_update_reval-by-moving-dpu.patch b/queue-6.12/ovl-fix-uaf-in-ovl_dentry_update_reval-by-moving-dpu.patch new file mode 100644 index 0000000000..fe9af0a6ff --- /dev/null +++ b/queue-6.12/ovl-fix-uaf-in-ovl_dentry_update_reval-by-moving-dpu.patch @@ -0,0 +1,71 @@ +From d462cfbf9ea6242c02ab24c534d838dd40be7128 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 15 Feb 2025 00:51:48 +0300 +Subject: ovl: fix UAF in ovl_dentry_update_reval by moving dput() in + ovl_link_up + +From: Vasiliy Kovalev + +[ Upstream commit c84e125fff2615b4d9c259e762596134eddd2f27 ] + +The issue was caused by dput(upper) being called before +ovl_dentry_update_reval(), while upper->d_flags was still +accessed in ovl_dentry_remote(). + +Move dput(upper) after its last use to prevent use-after-free. + +BUG: KASAN: slab-use-after-free in ovl_dentry_remote fs/overlayfs/util.c:162 [inline] +BUG: KASAN: slab-use-after-free in ovl_dentry_update_reval+0xd2/0xf0 fs/overlayfs/util.c:167 + +Call Trace: + + __dump_stack lib/dump_stack.c:88 [inline] + dump_stack_lvl+0x116/0x1f0 lib/dump_stack.c:114 + print_address_description mm/kasan/report.c:377 [inline] + print_report+0xc3/0x620 mm/kasan/report.c:488 + kasan_report+0xd9/0x110 mm/kasan/report.c:601 + ovl_dentry_remote fs/overlayfs/util.c:162 [inline] + ovl_dentry_update_reval+0xd2/0xf0 fs/overlayfs/util.c:167 + ovl_link_up fs/overlayfs/copy_up.c:610 [inline] + ovl_copy_up_one+0x2105/0x3490 fs/overlayfs/copy_up.c:1170 + ovl_copy_up_flags+0x18d/0x200 fs/overlayfs/copy_up.c:1223 + ovl_rename+0x39e/0x18c0 fs/overlayfs/dir.c:1136 + vfs_rename+0xf84/0x20a0 fs/namei.c:4893 +... + + +Fixes: b07d5cc93e1b ("ovl: update of dentry revalidate flags after copy up") +Reported-by: syzbot+316db8a1191938280eb6@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=316db8a1191938280eb6 +Signed-off-by: Vasiliy Kovalev +Link: https://lore.kernel.org/r/20250214215148.761147-1-kovalev@altlinux.org +Reviewed-by: Amir Goldstein +Signed-off-by: Christian Brauner +Signed-off-by: Sasha Levin +--- + fs/overlayfs/copy_up.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c +index b2c78621da44a..4388004a319d0 100644 +--- a/fs/overlayfs/copy_up.c ++++ b/fs/overlayfs/copy_up.c +@@ -619,7 +619,6 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c) + err = PTR_ERR(upper); + if (!IS_ERR(upper)) { + err = ovl_do_link(ofs, ovl_dentry_upper(c->dentry), udir, upper); +- dput(upper); + + if (!err) { + /* Restore timestamps on parent (best effort) */ +@@ -627,6 +626,7 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c) + ovl_dentry_set_upper_alias(c->dentry); + ovl_dentry_update_reval(c->dentry, upper); + } ++ dput(upper); + } + inode_unlock(udir); + if (err) +-- +2.39.5 + diff --git a/queue-6.12/rdma-bnxt_re-add-sanity-checks-on-rdev-validity.patch b/queue-6.12/rdma-bnxt_re-add-sanity-checks-on-rdev-validity.patch new file mode 100644 index 0000000000..80fc495ea9 --- /dev/null +++ b/queue-6.12/rdma-bnxt_re-add-sanity-checks-on-rdev-validity.patch @@ -0,0 +1,57 @@ +From 7e163a554039ee05ea3ba471957b41863ad8eaa1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 4 Feb 2025 00:21:23 -0800 +Subject: RDMA/bnxt_re: Add sanity checks on rdev validity + +From: Kalesh AP + +[ Upstream commit f0df225d12fcb049429fb5bf5122afe143c2dd15 ] + +There is a possibility that ulp_irq_stop and ulp_irq_start +callbacks will be called when the device is in detached state. +This can cause a crash due to NULL pointer dereference as +the rdev is already freed. + +Fixes: cc5b9b48d447 ("RDMA/bnxt_re: Recover the device when FW error is detected") +Signed-off-by: Kalesh AP +Signed-off-by: Selvin Xavier +Link: https://patch.msgid.link/1738657285-23968-3-git-send-email-selvin.xavier@broadcom.com +Signed-off-by: Leon Romanovsky +Signed-off-by: Sasha Levin +--- + drivers/infiniband/hw/bnxt_re/main.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c +index 08cc9ea175276..9fd83189d00a5 100644 +--- a/drivers/infiniband/hw/bnxt_re/main.c ++++ b/drivers/infiniband/hw/bnxt_re/main.c +@@ -314,6 +314,8 @@ static void bnxt_re_stop_irq(void *handle) + int indx; + + rdev = en_info->rdev; ++ if (!rdev) ++ return; + rcfw = &rdev->rcfw; + + for (indx = BNXT_RE_NQ_IDX; indx < rdev->nqr->num_msix; indx++) { +@@ -334,6 +336,8 @@ static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent) + int indx, rc; + + rdev = en_info->rdev; ++ if (!rdev) ++ return; + msix_ent = rdev->nqr->msix_entries; + rcfw = &rdev->rcfw; + if (!ent) { +@@ -2077,6 +2081,7 @@ static int bnxt_re_suspend(struct auxiliary_device *adev, pm_message_t state) + ibdev_info(&rdev->ibdev, "%s: L2 driver notified to stop en_state 0x%lx", + __func__, en_dev->en_state); + bnxt_re_remove_device(rdev, BNXT_RE_PRE_RECOVERY_REMOVE, adev); ++ bnxt_re_update_en_info_rdev(NULL, en_info, adev); + mutex_unlock(&bnxt_re_mutex); + + return 0; +-- +2.39.5 + diff --git a/queue-6.12/rdma-bnxt_re-allocate-dev_attr-information-dynamical.patch b/queue-6.12/rdma-bnxt_re-allocate-dev_attr-information-dynamical.patch new file mode 100644 index 0000000000..adc6c9c880 --- /dev/null +++ b/queue-6.12/rdma-bnxt_re-allocate-dev_attr-information-dynamical.patch @@ -0,0 +1,398 @@ +From 843299891291ea13c990373737a8904998213be0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 9 Jan 2025 10:18:13 -0800 +Subject: RDMA/bnxt_re: Allocate dev_attr information dynamically + +From: Kalesh AP + +[ Upstream commit 9264cd6aa8f194753507cb6e1f444141e7c79f48 ] + +In order to optimize the size of driver private structure, +the memory for dev_attr is allocated dynamically during the +chip context initialization. In order to make certain runtime +decisions, store dev_attr in the qplib_res structure. + +Signed-off-by: Kalesh AP +Signed-off-by: Selvin Xavier +Link: https://patch.msgid.link/1736446693-6692-3-git-send-email-selvin.xavier@broadcom.com +Signed-off-by: Leon Romanovsky +Stable-dep-of: 8238c7bd8420 ("RDMA/bnxt_re: Fix the statistics for Gen P7 VF") +Signed-off-by: Sasha Levin +--- + drivers/infiniband/hw/bnxt_re/bnxt_re.h | 2 +- + drivers/infiniband/hw/bnxt_re/hw_counters.c | 2 +- + drivers/infiniband/hw/bnxt_re/ib_verbs.c | 38 ++++++++++----------- + drivers/infiniband/hw/bnxt_re/main.c | 36 ++++++++++++------- + drivers/infiniband/hw/bnxt_re/qplib_res.c | 7 ++-- + drivers/infiniband/hw/bnxt_re/qplib_res.h | 4 +-- + drivers/infiniband/hw/bnxt_re/qplib_sp.c | 4 +-- + drivers/infiniband/hw/bnxt_re/qplib_sp.h | 3 +- + 8 files changed, 51 insertions(+), 45 deletions(-) + +diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h +index 784dc0fbd5268..a316afc0139c8 100644 +--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h ++++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h +@@ -195,7 +195,7 @@ struct bnxt_re_dev { + struct bnxt_re_nq_record *nqr; + + /* Device Resources */ +- struct bnxt_qplib_dev_attr dev_attr; ++ struct bnxt_qplib_dev_attr *dev_attr; + struct bnxt_qplib_ctx qplib_ctx; + struct bnxt_qplib_res qplib_res; + struct bnxt_qplib_dpi dpi_privileged; +diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c +index 1e63f80917483..656c150e38e6f 100644 +--- a/drivers/infiniband/hw/bnxt_re/hw_counters.c ++++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c +@@ -357,7 +357,7 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev, + goto done; + } + bnxt_re_copy_err_stats(rdev, stats, err_s); +- if (_is_ext_stats_supported(rdev->dev_attr.dev_cap_flags) && ++ if (_is_ext_stats_supported(rdev->dev_attr->dev_cap_flags) && + !rdev->is_virtfn) { + rc = bnxt_re_get_ext_stat(rdev, stats); + if (rc) { +diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c +index 11e2b3dee2a53..13c1563c2da62 100644 +--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c ++++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c +@@ -118,7 +118,7 @@ static enum ib_access_flags __to_ib_access_flags(int qflags) + static void bnxt_re_check_and_set_relaxed_ordering(struct bnxt_re_dev *rdev, + struct bnxt_qplib_mrw *qplib_mr) + { +- if (_is_relaxed_ordering_supported(rdev->dev_attr.dev_cap_flags2) && ++ if (_is_relaxed_ordering_supported(rdev->dev_attr->dev_cap_flags2) && + pcie_relaxed_ordering_enabled(rdev->en_dev->pdev)) + qplib_mr->flags |= CMDQ_REGISTER_MR_FLAGS_ENABLE_RO; + } +@@ -143,7 +143,7 @@ int bnxt_re_query_device(struct ib_device *ibdev, + struct ib_udata *udata) + { + struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); +- struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; ++ struct bnxt_qplib_dev_attr *dev_attr = rdev->dev_attr; + + memset(ib_attr, 0, sizeof(*ib_attr)); + memcpy(&ib_attr->fw_ver, dev_attr->fw_ver, +@@ -216,7 +216,7 @@ int bnxt_re_query_port(struct ib_device *ibdev, u32 port_num, + struct ib_port_attr *port_attr) + { + struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); +- struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; ++ struct bnxt_qplib_dev_attr *dev_attr = rdev->dev_attr; + int rc; + + memset(port_attr, 0, sizeof(*port_attr)); +@@ -274,8 +274,8 @@ void bnxt_re_query_fw_str(struct ib_device *ibdev, char *str) + struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); + + snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%d.%d", +- rdev->dev_attr.fw_ver[0], rdev->dev_attr.fw_ver[1], +- rdev->dev_attr.fw_ver[2], rdev->dev_attr.fw_ver[3]); ++ rdev->dev_attr->fw_ver[0], rdev->dev_attr->fw_ver[1], ++ rdev->dev_attr->fw_ver[2], rdev->dev_attr->fw_ver[3]); + } + + int bnxt_re_query_pkey(struct ib_device *ibdev, u32 port_num, +@@ -526,7 +526,7 @@ static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd) + mr->qplib_mr.pd = &pd->qplib_pd; + mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR; + mr->qplib_mr.access_flags = __from_ib_access_flags(mr_access_flags); +- if (!_is_alloc_mr_unified(rdev->dev_attr.dev_cap_flags)) { ++ if (!_is_alloc_mr_unified(rdev->dev_attr->dev_cap_flags)) { + rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr); + if (rc) { + ibdev_err(&rdev->ibdev, "Failed to alloc fence-HW-MR\n"); +@@ -1001,7 +1001,7 @@ static int bnxt_re_setup_swqe_size(struct bnxt_re_qp *qp, + rdev = qp->rdev; + qplqp = &qp->qplib_qp; + sq = &qplqp->sq; +- dev_attr = &rdev->dev_attr; ++ dev_attr = rdev->dev_attr; + + align = sizeof(struct sq_send_hdr); + ilsize = ALIGN(init_attr->cap.max_inline_data, align); +@@ -1221,7 +1221,7 @@ static int bnxt_re_init_rq_attr(struct bnxt_re_qp *qp, + rdev = qp->rdev; + qplqp = &qp->qplib_qp; + rq = &qplqp->rq; +- dev_attr = &rdev->dev_attr; ++ dev_attr = rdev->dev_attr; + + if (init_attr->srq) { + struct bnxt_re_srq *srq; +@@ -1258,7 +1258,7 @@ static void bnxt_re_adjust_gsi_rq_attr(struct bnxt_re_qp *qp) + + rdev = qp->rdev; + qplqp = &qp->qplib_qp; +- dev_attr = &rdev->dev_attr; ++ dev_attr = rdev->dev_attr; + + if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) { + qplqp->rq.max_sge = dev_attr->max_qp_sges; +@@ -1284,7 +1284,7 @@ static int bnxt_re_init_sq_attr(struct bnxt_re_qp *qp, + rdev = qp->rdev; + qplqp = &qp->qplib_qp; + sq = &qplqp->sq; +- dev_attr = &rdev->dev_attr; ++ dev_attr = rdev->dev_attr; + + sq->max_sge = init_attr->cap.max_send_sge; + entries = init_attr->cap.max_send_wr; +@@ -1337,7 +1337,7 @@ static void bnxt_re_adjust_gsi_sq_attr(struct bnxt_re_qp *qp, + + rdev = qp->rdev; + qplqp = &qp->qplib_qp; +- dev_attr = &rdev->dev_attr; ++ dev_attr = rdev->dev_attr; + + if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) { + entries = bnxt_re_init_depth(init_attr->cap.max_send_wr + 1, uctx); +@@ -1386,7 +1386,7 @@ static int bnxt_re_init_qp_attr(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd, + + rdev = qp->rdev; + qplqp = &qp->qplib_qp; +- dev_attr = &rdev->dev_attr; ++ dev_attr = rdev->dev_attr; + + /* Setup misc params */ + ether_addr_copy(qplqp->smac, rdev->netdev->dev_addr); +@@ -1556,7 +1556,7 @@ int bnxt_re_create_qp(struct ib_qp *ib_qp, struct ib_qp_init_attr *qp_init_attr, + ib_pd = ib_qp->pd; + pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); + rdev = pd->rdev; +- dev_attr = &rdev->dev_attr; ++ dev_attr = rdev->dev_attr; + qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp); + + uctx = rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, ib_uctx); +@@ -1783,7 +1783,7 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, + ib_pd = ib_srq->pd; + pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); + rdev = pd->rdev; +- dev_attr = &rdev->dev_attr; ++ dev_attr = rdev->dev_attr; + srq = container_of(ib_srq, struct bnxt_re_srq, ib_srq); + + if (srq_init_attr->attr.max_wr >= dev_attr->max_srq_wqes) { +@@ -1987,7 +1987,7 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, + { + struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp); + struct bnxt_re_dev *rdev = qp->rdev; +- struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; ++ struct bnxt_qplib_dev_attr *dev_attr = rdev->dev_attr; + enum ib_qp_state curr_qp_state, new_qp_state; + int rc, entries; + unsigned int flags; +@@ -3011,7 +3011,7 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata = &attrs->driver_udata; + struct bnxt_re_ucontext *uctx = + rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, ib_uctx); +- struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; ++ struct bnxt_qplib_dev_attr *dev_attr = rdev->dev_attr; + struct bnxt_qplib_chip_ctx *cctx; + struct bnxt_qplib_nq *nq = NULL; + unsigned int nq_alloc_cnt; +@@ -3154,7 +3154,7 @@ int bnxt_re_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) + + cq = container_of(ibcq, struct bnxt_re_cq, ib_cq); + rdev = cq->rdev; +- dev_attr = &rdev->dev_attr; ++ dev_attr = rdev->dev_attr; + if (!ibcq->uobject) { + ibdev_err(&rdev->ibdev, "Kernel CQ Resize not supported"); + return -EOPNOTSUPP; +@@ -4127,7 +4127,7 @@ static struct ib_mr *__bnxt_re_user_reg_mr(struct ib_pd *ib_pd, u64 length, u64 + mr->qplib_mr.access_flags = __from_ib_access_flags(mr_access_flags); + mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_MR; + +- if (!_is_alloc_mr_unified(rdev->dev_attr.dev_cap_flags)) { ++ if (!_is_alloc_mr_unified(rdev->dev_attr->dev_cap_flags)) { + rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr); + if (rc) { + ibdev_err(&rdev->ibdev, "Failed to allocate MR rc = %d", rc); +@@ -4219,7 +4219,7 @@ int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata) + struct bnxt_re_ucontext *uctx = + container_of(ctx, struct bnxt_re_ucontext, ib_uctx); + struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); +- struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; ++ struct bnxt_qplib_dev_attr *dev_attr = rdev->dev_attr; + struct bnxt_re_user_mmap_entry *entry; + struct bnxt_re_uctx_resp resp = {}; + struct bnxt_re_uctx_req ureq = {}; +diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c +index 9fd83189d00a5..9bd837a5b8a1a 100644 +--- a/drivers/infiniband/hw/bnxt_re/main.c ++++ b/drivers/infiniband/hw/bnxt_re/main.c +@@ -152,6 +152,10 @@ static void bnxt_re_destroy_chip_ctx(struct bnxt_re_dev *rdev) + + if (!rdev->chip_ctx) + return; ++ ++ kfree(rdev->dev_attr); ++ rdev->dev_attr = NULL; ++ + chip_ctx = rdev->chip_ctx; + rdev->chip_ctx = NULL; + rdev->rcfw.res = NULL; +@@ -165,7 +169,7 @@ static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev) + { + struct bnxt_qplib_chip_ctx *chip_ctx; + struct bnxt_en_dev *en_dev; +- int rc; ++ int rc = -ENOMEM; + + en_dev = rdev->en_dev; + +@@ -181,23 +185,30 @@ static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev) + + rdev->qplib_res.cctx = rdev->chip_ctx; + rdev->rcfw.res = &rdev->qplib_res; +- rdev->qplib_res.dattr = &rdev->dev_attr; ++ rdev->dev_attr = kzalloc(sizeof(*rdev->dev_attr), GFP_KERNEL); ++ if (!rdev->dev_attr) ++ goto free_chip_ctx; ++ rdev->qplib_res.dattr = rdev->dev_attr; + rdev->qplib_res.is_vf = BNXT_EN_VF(en_dev); + + bnxt_re_set_drv_mode(rdev); + + bnxt_re_set_db_offset(rdev); + rc = bnxt_qplib_map_db_bar(&rdev->qplib_res); +- if (rc) { +- kfree(rdev->chip_ctx); +- rdev->chip_ctx = NULL; +- return rc; +- } ++ if (rc) ++ goto free_dev_attr; + + if (bnxt_qplib_determine_atomics(en_dev->pdev)) + ibdev_info(&rdev->ibdev, + "platform doesn't support global atomics."); + return 0; ++free_dev_attr: ++ kfree(rdev->dev_attr); ++ rdev->dev_attr = NULL; ++free_chip_ctx: ++ kfree(rdev->chip_ctx); ++ rdev->chip_ctx = NULL; ++ return rc; + } + + /* SR-IOV helper functions */ +@@ -219,7 +230,7 @@ static void bnxt_re_limit_pf_res(struct bnxt_re_dev *rdev) + struct bnxt_qplib_ctx *ctx; + int i; + +- attr = &rdev->dev_attr; ++ attr = rdev->dev_attr; + ctx = &rdev->qplib_ctx; + + ctx->qpc_count = min_t(u32, BNXT_RE_MAX_QPC_COUNT, +@@ -233,7 +244,7 @@ static void bnxt_re_limit_pf_res(struct bnxt_re_dev *rdev) + if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) + for (i = 0; i < MAX_TQM_ALLOC_REQ; i++) + rdev->qplib_ctx.tqm_ctx.qcount[i] = +- rdev->dev_attr.tqm_alloc_reqs[i]; ++ rdev->dev_attr->tqm_alloc_reqs[i]; + } + + static void bnxt_re_limit_vf_res(struct bnxt_qplib_ctx *qplib_ctx, u32 num_vf) +@@ -1353,12 +1364,11 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev) + + /* Configure and allocate resources for qplib */ + rdev->qplib_res.rcfw = &rdev->rcfw; +- rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr); ++ rc = bnxt_qplib_get_dev_attr(&rdev->rcfw); + if (rc) + goto fail; + +- rc = bnxt_qplib_alloc_res(&rdev->qplib_res, rdev->en_dev->pdev, +- rdev->netdev, &rdev->dev_attr); ++ rc = bnxt_qplib_alloc_res(&rdev->qplib_res, rdev->netdev); + if (rc) + goto fail; + +@@ -1756,7 +1766,7 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type) + rdev->pacing.dbr_pacing = false; + } + } +- rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr); ++ rc = bnxt_qplib_get_dev_attr(&rdev->rcfw); + if (rc) + goto disable_rcfw; + +diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c +index 96ceec1e8199a..02922a0987ad7 100644 +--- a/drivers/infiniband/hw/bnxt_re/qplib_res.c ++++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c +@@ -876,14 +876,13 @@ void bnxt_qplib_free_res(struct bnxt_qplib_res *res) + bnxt_qplib_free_dpi_tbl(res, &res->dpi_tbl); + } + +-int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct pci_dev *pdev, +- struct net_device *netdev, +- struct bnxt_qplib_dev_attr *dev_attr) ++int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct net_device *netdev) + { ++ struct bnxt_qplib_dev_attr *dev_attr; + int rc; + +- res->pdev = pdev; + res->netdev = netdev; ++ dev_attr = res->dattr; + + rc = bnxt_qplib_alloc_sgid_tbl(res, &res->sgid_tbl, dev_attr->max_sgid); + if (rc) +diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h +index c2f710364e0ff..0bef58bd44e77 100644 +--- a/drivers/infiniband/hw/bnxt_re/qplib_res.h ++++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h +@@ -421,9 +421,7 @@ int bnxt_qplib_dealloc_dpi(struct bnxt_qplib_res *res, + void bnxt_qplib_cleanup_res(struct bnxt_qplib_res *res); + int bnxt_qplib_init_res(struct bnxt_qplib_res *res); + void bnxt_qplib_free_res(struct bnxt_qplib_res *res); +-int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct pci_dev *pdev, +- struct net_device *netdev, +- struct bnxt_qplib_dev_attr *dev_attr); ++int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct net_device *netdev); + void bnxt_qplib_free_ctx(struct bnxt_qplib_res *res, + struct bnxt_qplib_ctx *ctx); + int bnxt_qplib_alloc_ctx(struct bnxt_qplib_res *res, +diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c +index 3cca7b1395f6a..807439b1acb51 100644 +--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c ++++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c +@@ -88,9 +88,9 @@ static void bnxt_qplib_query_version(struct bnxt_qplib_rcfw *rcfw, + fw_ver[3] = resp.fw_rsvd; + } + +-int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, +- struct bnxt_qplib_dev_attr *attr) ++int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw) + { ++ struct bnxt_qplib_dev_attr *attr = rcfw->res->dattr; + struct creq_query_func_resp resp = {}; + struct bnxt_qplib_cmdqmsg msg = {}; + struct creq_query_func_resp_sb *sb; +diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h +index ecf3f45fea74f..de959b3c28e01 100644 +--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h ++++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h +@@ -325,8 +325,7 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, + int bnxt_qplib_update_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, + struct bnxt_qplib_gid *gid, u16 gid_idx, + const u8 *smac); +-int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, +- struct bnxt_qplib_dev_attr *attr); ++int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw); + int bnxt_qplib_set_func_resources(struct bnxt_qplib_res *res, + struct bnxt_qplib_rcfw *rcfw, + struct bnxt_qplib_ctx *ctx); +-- +2.39.5 + diff --git a/queue-6.12/rdma-bnxt_re-cache-msix-info-to-a-local-structure.patch b/queue-6.12/rdma-bnxt_re-cache-msix-info-to-a-local-structure.patch new file mode 100644 index 0000000000..94a7d90e32 --- /dev/null +++ b/queue-6.12/rdma-bnxt_re-cache-msix-info-to-a-local-structure.patch @@ -0,0 +1,109 @@ +From 8e5861f9690ef801c3de1959b7b2ce66423df978 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 14 Nov 2024 01:49:08 -0800 +Subject: RDMA/bnxt_re: Cache MSIx info to a local structure + +From: Kalesh AP + +[ Upstream commit 31bad59805c388f92f3a13174a149c2228301c15 ] + +L2 driver allocates the vectors for RoCE and pass it through the +en_dev structure to RoCE. During probe, cache the MSIx related +info to a local structure. + +Signed-off-by: Selvin Xavier +Signed-off-by: Kalesh AP +Link: https://patch.msgid.link/1731577748-1804-5-git-send-email-selvin.xavier@broadcom.com +Signed-off-by: Leon Romanovsky +Stable-dep-of: f0df225d12fc ("RDMA/bnxt_re: Add sanity checks on rdev validity") +Signed-off-by: Sasha Levin +--- + drivers/infiniband/hw/bnxt_re/bnxt_re.h | 1 + + drivers/infiniband/hw/bnxt_re/main.c | 18 ++++++++++-------- + 2 files changed, 11 insertions(+), 8 deletions(-) + +diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h +index 2a5cb66402860..784dc0fbd5268 100644 +--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h ++++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h +@@ -157,6 +157,7 @@ struct bnxt_re_pacing { + #define BNXT_RE_MIN_MSIX 2 + #define BNXT_RE_MAX_MSIX BNXT_MAX_ROCE_MSIX + struct bnxt_re_nq_record { ++ struct bnxt_msix_entry msix_entries[BNXT_RE_MAX_MSIX]; + struct bnxt_qplib_nq nq[BNXT_RE_MAX_MSIX]; + int num_msix; + }; +diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c +index 310a80962d0eb..08cc9ea175276 100644 +--- a/drivers/infiniband/hw/bnxt_re/main.c ++++ b/drivers/infiniband/hw/bnxt_re/main.c +@@ -334,7 +334,7 @@ static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent) + int indx, rc; + + rdev = en_info->rdev; +- msix_ent = rdev->en_dev->msix_entries; ++ msix_ent = rdev->nqr->msix_entries; + rcfw = &rdev->rcfw; + if (!ent) { + /* Not setting the f/w timeout bit in rcfw. +@@ -350,7 +350,7 @@ static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent) + * in device sctructure. + */ + for (indx = 0; indx < rdev->nqr->num_msix; indx++) +- rdev->en_dev->msix_entries[indx].vector = ent[indx].vector; ++ rdev->nqr->msix_entries[indx].vector = ent[indx].vector; + + rc = bnxt_qplib_rcfw_start_irq(rcfw, msix_ent[BNXT_RE_AEQ_IDX].vector, + false); +@@ -1292,9 +1292,9 @@ static int bnxt_re_init_res(struct bnxt_re_dev *rdev) + bnxt_qplib_init_res(&rdev->qplib_res); + + for (i = 1; i < rdev->nqr->num_msix ; i++) { +- db_offt = rdev->en_dev->msix_entries[i].db_offset; ++ db_offt = rdev->nqr->msix_entries[i].db_offset; + rc = bnxt_qplib_enable_nq(rdev->en_dev->pdev, &rdev->nqr->nq[i - 1], +- i - 1, rdev->en_dev->msix_entries[i].vector, ++ i - 1, rdev->nqr->msix_entries[i].vector, + db_offt, &bnxt_re_cqn_handler, + &bnxt_re_srqn_handler); + if (rc) { +@@ -1381,7 +1381,7 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev) + rattr.type = type; + rattr.mode = RING_ALLOC_REQ_INT_MODE_MSIX; + rattr.depth = BNXT_QPLIB_NQE_MAX_CNT - 1; +- rattr.lrid = rdev->en_dev->msix_entries[i + 1].ring_idx; ++ rattr.lrid = rdev->nqr->msix_entries[i + 1].ring_idx; + rc = bnxt_re_net_ring_alloc(rdev, &rattr, &nq->ring_id); + if (rc) { + ibdev_err(&rdev->ibdev, +@@ -1698,6 +1698,8 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type) + return rc; + } + rdev->nqr->num_msix = rdev->en_dev->ulp_tbl->msix_requested; ++ memcpy(rdev->nqr->msix_entries, rdev->en_dev->msix_entries, ++ sizeof(struct bnxt_msix_entry) * rdev->nqr->num_msix); + + /* Check whether VF or PF */ + bnxt_re_get_sriov_func_type(rdev); +@@ -1723,14 +1725,14 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type) + rattr.type = type; + rattr.mode = RING_ALLOC_REQ_INT_MODE_MSIX; + rattr.depth = BNXT_QPLIB_CREQE_MAX_CNT - 1; +- rattr.lrid = rdev->en_dev->msix_entries[BNXT_RE_AEQ_IDX].ring_idx; ++ rattr.lrid = rdev->nqr->msix_entries[BNXT_RE_AEQ_IDX].ring_idx; + rc = bnxt_re_net_ring_alloc(rdev, &rattr, &creq->ring_id); + if (rc) { + ibdev_err(&rdev->ibdev, "Failed to allocate CREQ: %#x\n", rc); + goto free_rcfw; + } +- db_offt = rdev->en_dev->msix_entries[BNXT_RE_AEQ_IDX].db_offset; +- vid = rdev->en_dev->msix_entries[BNXT_RE_AEQ_IDX].vector; ++ db_offt = rdev->nqr->msix_entries[BNXT_RE_AEQ_IDX].db_offset; ++ vid = rdev->nqr->msix_entries[BNXT_RE_AEQ_IDX].vector; + rc = bnxt_qplib_enable_rcfw_channel(&rdev->rcfw, + vid, db_offt, + &bnxt_re_aeq_handler); +-- +2.39.5 + diff --git a/queue-6.12/rdma-bnxt_re-fail-probe-early-when-not-enough-msi-x-.patch b/queue-6.12/rdma-bnxt_re-fail-probe-early-when-not-enough-msi-x-.patch new file mode 100644 index 0000000000..e441a4502e --- /dev/null +++ b/queue-6.12/rdma-bnxt_re-fail-probe-early-when-not-enough-msi-x-.patch @@ -0,0 +1,89 @@ +From 7df3d042fc6120aee3467a5f8fa65ed257451bc3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 14 Nov 2024 01:49:05 -0800 +Subject: RDMA/bnxt_re: Fail probe early when not enough MSI-x vectors are + reserved + +From: Kalesh AP + +[ Upstream commit 65ecee132774e0f15cd76a766eb39ec21118bffc ] + +L2 driver allocates and populates the MSI-x vector details for RoCE +in the en_dev structure. RoCE driver requires minimum 2 MSIx vectors. +Hence during probe, driver has to check and bail out if there are not +enough MSI-x vectors reserved for it before proceeding further +initialization. + +Reviewed-by: Andy Gospodarek +Reviewed-by: Ajit Khaparde +Reviewed-by: Hongguang Gao +Reviewed-by: Bhargava Chenna Marreddy +Reviewed-by: Kashyap Desai +Reviewed-by: Chandramohan Akula +Signed-off-by: Kalesh AP +Signed-off-by: Selvin Xavier +Link: https://patch.msgid.link/1731577748-1804-2-git-send-email-selvin.xavier@broadcom.com +Signed-off-by: Leon Romanovsky +Stable-dep-of: f0df225d12fc ("RDMA/bnxt_re: Add sanity checks on rdev validity") +Signed-off-by: Sasha Levin +--- + drivers/infiniband/hw/bnxt_re/bnxt_re.h | 2 ++ + drivers/infiniband/hw/bnxt_re/main.c | 22 ++++++++++++---------- + 2 files changed, 14 insertions(+), 10 deletions(-) + +diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h +index e94518b12f86e..7a1acad232c5e 100644 +--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h ++++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h +@@ -154,6 +154,8 @@ struct bnxt_re_pacing { + + #define BNXT_RE_GRC_FIFO_REG_BASE 0x2000 + ++#define BNXT_RE_MIN_MSIX 2 ++ + #define MAX_CQ_HASH_BITS (16) + #define MAX_SRQ_HASH_BITS (16) + struct bnxt_re_dev { +diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c +index 8abd1b723f8ff..32ecc802afd13 100644 +--- a/drivers/infiniband/hw/bnxt_re/main.c ++++ b/drivers/infiniband/hw/bnxt_re/main.c +@@ -1653,6 +1653,18 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type) + } + set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); + ++ if (rdev->en_dev->ulp_tbl->msix_requested < BNXT_RE_MIN_MSIX) { ++ ibdev_err(&rdev->ibdev, ++ "RoCE requires minimum 2 MSI-X vectors, but only %d reserved\n", ++ rdev->en_dev->ulp_tbl->msix_requested); ++ bnxt_unregister_dev(rdev->en_dev); ++ clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); ++ return -EINVAL; ++ } ++ ibdev_dbg(&rdev->ibdev, "Got %d MSI-X vectors\n", ++ rdev->en_dev->ulp_tbl->msix_requested); ++ rdev->num_msix = rdev->en_dev->ulp_tbl->msix_requested; ++ + rc = bnxt_re_setup_chip_ctx(rdev); + if (rc) { + bnxt_unregister_dev(rdev->en_dev); +@@ -1664,16 +1676,6 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type) + /* Check whether VF or PF */ + bnxt_re_get_sriov_func_type(rdev); + +- if (!rdev->en_dev->ulp_tbl->msix_requested) { +- ibdev_err(&rdev->ibdev, +- "Failed to get MSI-X vectors: %#x\n", rc); +- rc = -EINVAL; +- goto fail; +- } +- ibdev_dbg(&rdev->ibdev, "Got %d MSI-X vectors\n", +- rdev->en_dev->ulp_tbl->msix_requested); +- rdev->num_msix = rdev->en_dev->ulp_tbl->msix_requested; +- + bnxt_re_query_hwrm_intf_version(rdev); + + /* Establish RCFW Communication Channel to initialize the context +-- +2.39.5 + diff --git a/queue-6.12/rdma-bnxt_re-fix-the-page-details-for-the-srq-create.patch b/queue-6.12/rdma-bnxt_re-fix-the-page-details-for-the-srq-create.patch new file mode 100644 index 0000000000..8b5a4e67a5 --- /dev/null +++ b/queue-6.12/rdma-bnxt_re-fix-the-page-details-for-the-srq-create.patch @@ -0,0 +1,65 @@ +From 31f2ff0cf1c5833c4108a64066c93f84038c154e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 22 Feb 2025 07:20:21 -0800 +Subject: RDMA/bnxt_re: Fix the page details for the srq created by kernel + consumers + +From: Kashyap Desai + +[ Upstream commit b66535356a4834a234f99e16a97eb51f2c6c5a7d ] + +While using nvme target with use_srq on, below kernel panic is noticed. + +[ 549.698111] bnxt_en 0000:41:00.0 enp65s0np0: FEC autoneg off encoding: Clause 91 RS(544,514) +[ 566.393619] Oops: divide error: 0000 [#1] PREEMPT SMP NOPTI +.. +[ 566.393799] +[ 566.393807] ? __die_body+0x1a/0x60 +[ 566.393823] ? die+0x38/0x60 +[ 566.393835] ? do_trap+0xe4/0x110 +[ 566.393847] ? bnxt_qplib_alloc_init_hwq+0x1d4/0x580 [bnxt_re] +[ 566.393867] ? bnxt_qplib_alloc_init_hwq+0x1d4/0x580 [bnxt_re] +[ 566.393881] ? do_error_trap+0x7c/0x120 +[ 566.393890] ? bnxt_qplib_alloc_init_hwq+0x1d4/0x580 [bnxt_re] +[ 566.393911] ? exc_divide_error+0x34/0x50 +[ 566.393923] ? bnxt_qplib_alloc_init_hwq+0x1d4/0x580 [bnxt_re] +[ 566.393939] ? asm_exc_divide_error+0x16/0x20 +[ 566.393966] ? bnxt_qplib_alloc_init_hwq+0x1d4/0x580 [bnxt_re] +[ 566.393997] bnxt_qplib_create_srq+0xc9/0x340 [bnxt_re] +[ 566.394040] bnxt_re_create_srq+0x335/0x3b0 [bnxt_re] +[ 566.394057] ? srso_return_thunk+0x5/0x5f +[ 566.394068] ? __init_swait_queue_head+0x4a/0x60 +[ 566.394090] ib_create_srq_user+0xa7/0x150 [ib_core] +[ 566.394147] nvmet_rdma_queue_connect+0x7d0/0xbe0 [nvmet_rdma] +[ 566.394174] ? lock_release+0x22c/0x3f0 +[ 566.394187] ? srso_return_thunk+0x5/0x5f + +Page size and shift info is set only for the user space SRQs. +Set page size and page shift for kernel space SRQs also. + +Fixes: 0c4dcd602817 ("RDMA/bnxt_re: Refactor hardware queue memory allocation") +Signed-off-by: Kashyap Desai +Signed-off-by: Selvin Xavier +Link: https://patch.msgid.link/1740237621-29291-1-git-send-email-selvin.xavier@broadcom.com +Signed-off-by: Leon Romanovsky +Signed-off-by: Sasha Levin +--- + drivers/infiniband/hw/bnxt_re/ib_verbs.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c +index 13c1563c2da62..0b21d8b5d9629 100644 +--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c ++++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c +@@ -1815,6 +1815,8 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, + srq->qplib_srq.threshold = srq_init_attr->attr.srq_limit; + srq->srq_limit = srq_init_attr->attr.srq_limit; + srq->qplib_srq.eventq_hw_ring_id = rdev->nqr->nq[0].ring_id; ++ srq->qplib_srq.sg_info.pgsize = PAGE_SIZE; ++ srq->qplib_srq.sg_info.pgshft = PAGE_SHIFT; + nq = &rdev->nqr->nq[0]; + + if (udata) { +-- +2.39.5 + diff --git a/queue-6.12/rdma-bnxt_re-fix-the-statistics-for-gen-p7-vf.patch b/queue-6.12/rdma-bnxt_re-fix-the-statistics-for-gen-p7-vf.patch new file mode 100644 index 0000000000..7b0e254f5f --- /dev/null +++ b/queue-6.12/rdma-bnxt_re-fix-the-statistics-for-gen-p7-vf.patch @@ -0,0 +1,60 @@ +From 0a9aabaf9e9f65423b28be525ab35cc76c74fd2e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 4 Feb 2025 00:21:25 -0800 +Subject: RDMA/bnxt_re: Fix the statistics for Gen P7 VF + +From: Selvin Xavier + +[ Upstream commit 8238c7bd84209c8216b1381ab0dbe6db9e203769 ] + +Gen P7 VF support the extended stats and is prevented +by a VF check. Fix the check to issue the FW command +for GenP7 VFs also. + +Fixes: 1801d87b3598 ("RDMA/bnxt_re: Support new 5760X P7 devices") +Signed-off-by: Selvin Xavier +Link: https://patch.msgid.link/1738657285-23968-5-git-send-email-selvin.xavier@broadcom.com +Signed-off-by: Leon Romanovsky +Signed-off-by: Sasha Levin +--- + drivers/infiniband/hw/bnxt_re/hw_counters.c | 4 ++-- + drivers/infiniband/hw/bnxt_re/qplib_res.h | 8 ++++++++ + 2 files changed, 10 insertions(+), 2 deletions(-) + +diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c +index 656c150e38e6f..f51adb0a97e66 100644 +--- a/drivers/infiniband/hw/bnxt_re/hw_counters.c ++++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c +@@ -357,8 +357,8 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev, + goto done; + } + bnxt_re_copy_err_stats(rdev, stats, err_s); +- if (_is_ext_stats_supported(rdev->dev_attr->dev_cap_flags) && +- !rdev->is_virtfn) { ++ if (bnxt_ext_stats_supported(rdev->chip_ctx, rdev->dev_attr->dev_cap_flags, ++ rdev->is_virtfn)) { + rc = bnxt_re_get_ext_stat(rdev, stats); + if (rc) { + clear_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, +diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h +index 0bef58bd44e77..b40cff8252bc4 100644 +--- a/drivers/infiniband/hw/bnxt_re/qplib_res.h ++++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h +@@ -544,6 +544,14 @@ static inline bool _is_ext_stats_supported(u16 dev_cap_flags) + CREQ_QUERY_FUNC_RESP_SB_EXT_STATS; + } + ++static inline int bnxt_ext_stats_supported(struct bnxt_qplib_chip_ctx *ctx, ++ u16 flags, bool virtfn) ++{ ++ /* ext stats supported if cap flag is set AND is a PF OR a Thor2 VF */ ++ return (_is_ext_stats_supported(flags) && ++ ((virtfn && bnxt_qplib_is_chip_gen_p7(ctx)) || (!virtfn))); ++} ++ + static inline bool _is_hw_retx_supported(u16 dev_cap_flags) + { + return dev_cap_flags & +-- +2.39.5 + diff --git a/queue-6.12/rdma-bnxt_re-refactor-nq-allocation.patch b/queue-6.12/rdma-bnxt_re-refactor-nq-allocation.patch new file mode 100644 index 0000000000..e3928f4cf8 --- /dev/null +++ b/queue-6.12/rdma-bnxt_re-refactor-nq-allocation.patch @@ -0,0 +1,300 @@ +From 1232ab1f64278b8c5dee1b0e8d7529bf4471410d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 14 Nov 2024 01:49:06 -0800 +Subject: RDMA/bnxt_re: Refactor NQ allocation + +From: Kalesh AP + +[ Upstream commit 30b871338c3ebab4c5efb74f6b23b59f1ac4ca1f ] + +Move NQ related data structures from rdev to a new structure +named "struct bnxt_re_nq_record" by keeping a pointer to in +the rdev structure. Allocate the memory for it dynamically. +This change is needed for subsequent patches in the series. + +Also, removed the nq_task variable from rdev structure as it +is redundant and no longer used. + +This change would help to reduce the size of the driver private +structure as well. + +Reviewed-by: Chandramohan Akula +Signed-off-by: Kalesh AP +Signed-off-by: Selvin Xavier +Link: https://patch.msgid.link/1731577748-1804-3-git-send-email-selvin.xavier@broadcom.com +Signed-off-by: Leon Romanovsky +Stable-dep-of: f0df225d12fc ("RDMA/bnxt_re: Add sanity checks on rdev validity") +Signed-off-by: Sasha Levin +--- + drivers/infiniband/hw/bnxt_re/bnxt_re.h | 13 +++-- + drivers/infiniband/hw/bnxt_re/ib_verbs.c | 6 +- + drivers/infiniband/hw/bnxt_re/main.c | 74 ++++++++++++++++-------- + 3 files changed, 60 insertions(+), 33 deletions(-) + +diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h +index 7a1acad232c5e..2a5cb66402860 100644 +--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h ++++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h +@@ -155,6 +155,11 @@ struct bnxt_re_pacing { + #define BNXT_RE_GRC_FIFO_REG_BASE 0x2000 + + #define BNXT_RE_MIN_MSIX 2 ++#define BNXT_RE_MAX_MSIX BNXT_MAX_ROCE_MSIX ++struct bnxt_re_nq_record { ++ struct bnxt_qplib_nq nq[BNXT_RE_MAX_MSIX]; ++ int num_msix; ++}; + + #define MAX_CQ_HASH_BITS (16) + #define MAX_SRQ_HASH_BITS (16) +@@ -176,21 +181,17 @@ struct bnxt_re_dev { + unsigned int version, major, minor; + struct bnxt_qplib_chip_ctx *chip_ctx; + struct bnxt_en_dev *en_dev; +- int num_msix; + + int id; + + struct delayed_work worker; + u8 cur_prio_map; + +- /* FP Notification Queue (CQ & SRQ) */ +- struct tasklet_struct nq_task; +- + /* RCFW Channel */ + struct bnxt_qplib_rcfw rcfw; + +- /* NQ */ +- struct bnxt_qplib_nq nq[BNXT_MAX_ROCE_MSIX]; ++ /* NQ record */ ++ struct bnxt_re_nq_record *nqr; + + /* Device Resources */ + struct bnxt_qplib_dev_attr dev_attr; +diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c +index a7067c3c06797..11e2b3dee2a53 100644 +--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c ++++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c +@@ -1814,8 +1814,8 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, + srq->qplib_srq.wqe_size = bnxt_re_get_rwqe_size(dev_attr->max_srq_sges); + srq->qplib_srq.threshold = srq_init_attr->attr.srq_limit; + srq->srq_limit = srq_init_attr->attr.srq_limit; +- srq->qplib_srq.eventq_hw_ring_id = rdev->nq[0].ring_id; +- nq = &rdev->nq[0]; ++ srq->qplib_srq.eventq_hw_ring_id = rdev->nqr->nq[0].ring_id; ++ nq = &rdev->nqr->nq[0]; + + if (udata) { + rc = bnxt_re_init_user_srq(rdev, pd, srq, udata); +@@ -3070,7 +3070,7 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + * used for getting the NQ index. + */ + nq_alloc_cnt = atomic_inc_return(&rdev->nq_alloc_cnt); +- nq = &rdev->nq[nq_alloc_cnt % (rdev->num_msix - 1)]; ++ nq = &rdev->nqr->nq[nq_alloc_cnt % (rdev->nqr->num_msix - 1)]; + cq->qplib_cq.max_wqe = entries; + cq->qplib_cq.cnq_hw_ring_id = nq->ring_id; + cq->qplib_cq.nq = nq; +diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c +index 32ecc802afd13..310a80962d0eb 100644 +--- a/drivers/infiniband/hw/bnxt_re/main.c ++++ b/drivers/infiniband/hw/bnxt_re/main.c +@@ -316,8 +316,8 @@ static void bnxt_re_stop_irq(void *handle) + rdev = en_info->rdev; + rcfw = &rdev->rcfw; + +- for (indx = BNXT_RE_NQ_IDX; indx < rdev->num_msix; indx++) { +- nq = &rdev->nq[indx - 1]; ++ for (indx = BNXT_RE_NQ_IDX; indx < rdev->nqr->num_msix; indx++) { ++ nq = &rdev->nqr->nq[indx - 1]; + bnxt_qplib_nq_stop_irq(nq, false); + } + +@@ -349,7 +349,7 @@ static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent) + /* Vectors may change after restart, so update with new vectors + * in device sctructure. + */ +- for (indx = 0; indx < rdev->num_msix; indx++) ++ for (indx = 0; indx < rdev->nqr->num_msix; indx++) + rdev->en_dev->msix_entries[indx].vector = ent[indx].vector; + + rc = bnxt_qplib_rcfw_start_irq(rcfw, msix_ent[BNXT_RE_AEQ_IDX].vector, +@@ -358,8 +358,8 @@ static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent) + ibdev_warn(&rdev->ibdev, "Failed to reinit CREQ\n"); + return; + } +- for (indx = BNXT_RE_NQ_IDX ; indx < rdev->num_msix; indx++) { +- nq = &rdev->nq[indx - 1]; ++ for (indx = BNXT_RE_NQ_IDX ; indx < rdev->nqr->num_msix; indx++) { ++ nq = &rdev->nqr->nq[indx - 1]; + rc = bnxt_qplib_nq_start_irq(nq, indx - 1, + msix_ent[indx].vector, false); + if (rc) { +@@ -943,7 +943,7 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) + + addrconf_addr_eui48((u8 *)&ibdev->node_guid, rdev->netdev->dev_addr); + +- ibdev->num_comp_vectors = rdev->num_msix - 1; ++ ibdev->num_comp_vectors = rdev->nqr->num_msix - 1; + ibdev->dev.parent = &rdev->en_dev->pdev->dev; + ibdev->local_dma_lkey = BNXT_QPLIB_RSVD_LKEY; + +@@ -1276,8 +1276,8 @@ static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev) + { + int i; + +- for (i = 1; i < rdev->num_msix; i++) +- bnxt_qplib_disable_nq(&rdev->nq[i - 1]); ++ for (i = 1; i < rdev->nqr->num_msix; i++) ++ bnxt_qplib_disable_nq(&rdev->nqr->nq[i - 1]); + + if (rdev->qplib_res.rcfw) + bnxt_qplib_cleanup_res(&rdev->qplib_res); +@@ -1291,9 +1291,9 @@ static int bnxt_re_init_res(struct bnxt_re_dev *rdev) + + bnxt_qplib_init_res(&rdev->qplib_res); + +- for (i = 1; i < rdev->num_msix ; i++) { ++ for (i = 1; i < rdev->nqr->num_msix ; i++) { + db_offt = rdev->en_dev->msix_entries[i].db_offset; +- rc = bnxt_qplib_enable_nq(rdev->en_dev->pdev, &rdev->nq[i - 1], ++ rc = bnxt_qplib_enable_nq(rdev->en_dev->pdev, &rdev->nqr->nq[i - 1], + i - 1, rdev->en_dev->msix_entries[i].vector, + db_offt, &bnxt_re_cqn_handler, + &bnxt_re_srqn_handler); +@@ -1307,20 +1307,22 @@ static int bnxt_re_init_res(struct bnxt_re_dev *rdev) + return 0; + fail: + for (i = num_vec_enabled; i >= 0; i--) +- bnxt_qplib_disable_nq(&rdev->nq[i]); ++ bnxt_qplib_disable_nq(&rdev->nqr->nq[i]); + return rc; + } + + static void bnxt_re_free_nq_res(struct bnxt_re_dev *rdev) + { ++ struct bnxt_qplib_nq *nq; + u8 type; + int i; + +- for (i = 0; i < rdev->num_msix - 1; i++) { ++ for (i = 0; i < rdev->nqr->num_msix - 1; i++) { + type = bnxt_qplib_get_ring_type(rdev->chip_ctx); +- bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id, type); +- bnxt_qplib_free_nq(&rdev->nq[i]); +- rdev->nq[i].res = NULL; ++ nq = &rdev->nqr->nq[i]; ++ bnxt_re_net_ring_free(rdev, nq->ring_id, type); ++ bnxt_qplib_free_nq(nq); ++ nq->res = NULL; + } + } + +@@ -1362,12 +1364,12 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev) + if (rc) + goto dealloc_res; + +- for (i = 0; i < rdev->num_msix - 1; i++) { ++ for (i = 0; i < rdev->nqr->num_msix - 1; i++) { + struct bnxt_qplib_nq *nq; + +- nq = &rdev->nq[i]; ++ nq = &rdev->nqr->nq[i]; + nq->hwq.max_elements = BNXT_QPLIB_NQE_MAX_CNT; +- rc = bnxt_qplib_alloc_nq(&rdev->qplib_res, &rdev->nq[i]); ++ rc = bnxt_qplib_alloc_nq(&rdev->qplib_res, nq); + if (rc) { + ibdev_err(&rdev->ibdev, "Alloc Failed NQ%d rc:%#x", + i, rc); +@@ -1375,7 +1377,7 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev) + } + type = bnxt_qplib_get_ring_type(rdev->chip_ctx); + rattr.dma_arr = nq->hwq.pbl[PBL_LVL_0].pg_map_arr; +- rattr.pages = nq->hwq.pbl[rdev->nq[i].hwq.level].pg_count; ++ rattr.pages = nq->hwq.pbl[rdev->nqr->nq[i].hwq.level].pg_count; + rattr.type = type; + rattr.mode = RING_ALLOC_REQ_INT_MODE_MSIX; + rattr.depth = BNXT_QPLIB_NQE_MAX_CNT - 1; +@@ -1385,7 +1387,7 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev) + ibdev_err(&rdev->ibdev, + "Failed to allocate NQ fw id with rc = 0x%x", + rc); +- bnxt_qplib_free_nq(&rdev->nq[i]); ++ bnxt_qplib_free_nq(nq); + goto free_nq; + } + num_vec_created++; +@@ -1394,8 +1396,8 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev) + free_nq: + for (i = num_vec_created - 1; i >= 0; i--) { + type = bnxt_qplib_get_ring_type(rdev->chip_ctx); +- bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id, type); +- bnxt_qplib_free_nq(&rdev->nq[i]); ++ bnxt_re_net_ring_free(rdev, rdev->nqr->nq[i].ring_id, type); ++ bnxt_qplib_free_nq(&rdev->nqr->nq[i]); + } + bnxt_qplib_dealloc_dpi(&rdev->qplib_res, + &rdev->dpi_privileged); +@@ -1584,6 +1586,21 @@ static int bnxt_re_ib_init(struct bnxt_re_dev *rdev) + return rc; + } + ++static int bnxt_re_alloc_nqr_mem(struct bnxt_re_dev *rdev) ++{ ++ rdev->nqr = kzalloc(sizeof(*rdev->nqr), GFP_KERNEL); ++ if (!rdev->nqr) ++ return -ENOMEM; ++ ++ return 0; ++} ++ ++static void bnxt_re_free_nqr_mem(struct bnxt_re_dev *rdev) ++{ ++ kfree(rdev->nqr); ++ rdev->nqr = NULL; ++} ++ + static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type) + { + u8 type; +@@ -1611,11 +1628,12 @@ static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type) + bnxt_qplib_free_rcfw_channel(&rdev->rcfw); + } + +- rdev->num_msix = 0; ++ rdev->nqr->num_msix = 0; + + if (rdev->pacing.dbr_pacing) + bnxt_re_deinitialize_dbr_pacing(rdev); + ++ bnxt_re_free_nqr_mem(rdev); + bnxt_re_destroy_chip_ctx(rdev); + if (op_type == BNXT_RE_COMPLETE_REMOVE) { + if (test_and_clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags)) +@@ -1663,7 +1681,6 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type) + } + ibdev_dbg(&rdev->ibdev, "Got %d MSI-X vectors\n", + rdev->en_dev->ulp_tbl->msix_requested); +- rdev->num_msix = rdev->en_dev->ulp_tbl->msix_requested; + + rc = bnxt_re_setup_chip_ctx(rdev); + if (rc) { +@@ -1673,6 +1690,15 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type) + return -EINVAL; + } + ++ rc = bnxt_re_alloc_nqr_mem(rdev); ++ if (rc) { ++ bnxt_re_destroy_chip_ctx(rdev); ++ bnxt_unregister_dev(rdev->en_dev); ++ clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); ++ return rc; ++ } ++ rdev->nqr->num_msix = rdev->en_dev->ulp_tbl->msix_requested; ++ + /* Check whether VF or PF */ + bnxt_re_get_sriov_func_type(rdev); + +-- +2.39.5 + diff --git a/queue-6.12/rdma-hns-fix-mbox-timing-out-by-adding-retry-mechani.patch b/queue-6.12/rdma-hns-fix-mbox-timing-out-by-adding-retry-mechani.patch new file mode 100644 index 0000000000..988d1646cf --- /dev/null +++ b/queue-6.12/rdma-hns-fix-mbox-timing-out-by-adding-retry-mechani.patch @@ -0,0 +1,167 @@ +From 01894f14ba742da305a4a6518a0e20216b707ee9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 8 Feb 2025 18:59:30 +0800 +Subject: RDMA/hns: Fix mbox timing out by adding retry mechanism + +From: Junxian Huang + +[ Upstream commit 9747c0c7791d4a5a62018a0c9c563dd2e6f6c1c0 ] + +If a QP is modified to error state and a flush CQE process is triggered, +the subsequent QP destruction mbox can still be successfully posted but +will be blocked in HW until the flush CQE process finishes. This causes +further mbox posting timeouts in driver. The blocking time is related +to QP depth. Considering an extreme case where SQ depth and RQ depth +are both 32K, the blocking time can reach about 135ms. + +This patch adds a retry mechanism for mbox posting. For each try, FW +waits 15ms for HW to complete the previous mbox, otherwise return a +timeout error code to driver. Counting other time consumption in FW, +set 8 tries for mbox posting and a 5ms time gap before each retry to +increase to a sufficient timeout limit. + +Fixes: 0425e3e6e0c7 ("RDMA/hns: Support flush cqe for hip08 in kernel space") +Signed-off-by: Junxian Huang +Link: https://patch.msgid.link/20250208105930.522796-1-huangjunxian6@hisilicon.com +Signed-off-by: Leon Romanovsky +Signed-off-by: Sasha Levin +--- + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 64 ++++++++++++++++------ + drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 2 + + 2 files changed, 50 insertions(+), 16 deletions(-) + +diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +index 0144e7210d05a..f5c3e560df58d 100644 +--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c ++++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +@@ -1286,10 +1286,8 @@ static u32 hns_roce_cmdq_tx_timeout(u16 opcode, u32 tx_timeout) + return tx_timeout; + } + +-static void hns_roce_wait_csq_done(struct hns_roce_dev *hr_dev, u16 opcode) ++static void hns_roce_wait_csq_done(struct hns_roce_dev *hr_dev, u32 tx_timeout) + { +- struct hns_roce_v2_priv *priv = hr_dev->priv; +- u32 tx_timeout = hns_roce_cmdq_tx_timeout(opcode, priv->cmq.tx_timeout); + u32 timeout = 0; + + do { +@@ -1299,8 +1297,9 @@ static void hns_roce_wait_csq_done(struct hns_roce_dev *hr_dev, u16 opcode) + } while (++timeout < tx_timeout); + } + +-static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, +- struct hns_roce_cmq_desc *desc, int num) ++static int __hns_roce_cmq_send_one(struct hns_roce_dev *hr_dev, ++ struct hns_roce_cmq_desc *desc, ++ int num, u32 tx_timeout) + { + struct hns_roce_v2_priv *priv = hr_dev->priv; + struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq; +@@ -1309,8 +1308,6 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, + int ret; + int i; + +- spin_lock_bh(&csq->lock); +- + tail = csq->head; + + for (i = 0; i < num; i++) { +@@ -1324,22 +1321,17 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, + + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_CNT]); + +- hns_roce_wait_csq_done(hr_dev, le16_to_cpu(desc->opcode)); ++ hns_roce_wait_csq_done(hr_dev, tx_timeout); + if (hns_roce_cmq_csq_done(hr_dev)) { + ret = 0; + for (i = 0; i < num; i++) { + /* check the result of hardware write back */ +- desc[i] = csq->desc[tail++]; ++ desc_ret = le16_to_cpu(csq->desc[tail++].retval); + if (tail == csq->desc_num) + tail = 0; +- +- desc_ret = le16_to_cpu(desc[i].retval); + if (likely(desc_ret == CMD_EXEC_SUCCESS)) + continue; + +- dev_err_ratelimited(hr_dev->dev, +- "Cmdq IO error, opcode = 0x%x, return = 0x%x.\n", +- desc->opcode, desc_ret); + ret = hns_roce_cmd_err_convert_errno(desc_ret); + } + } else { +@@ -1354,14 +1346,54 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, + ret = -EAGAIN; + } + +- spin_unlock_bh(&csq->lock); +- + if (ret) + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_ERR_CNT]); + + return ret; + } + ++static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, ++ struct hns_roce_cmq_desc *desc, int num) ++{ ++ struct hns_roce_v2_priv *priv = hr_dev->priv; ++ struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq; ++ u16 opcode = le16_to_cpu(desc->opcode); ++ u32 tx_timeout = hns_roce_cmdq_tx_timeout(opcode, priv->cmq.tx_timeout); ++ u8 try_cnt = HNS_ROCE_OPC_POST_MB_TRY_CNT; ++ u32 rsv_tail; ++ int ret; ++ int i; ++ ++ while (try_cnt) { ++ try_cnt--; ++ ++ spin_lock_bh(&csq->lock); ++ rsv_tail = csq->head; ++ ret = __hns_roce_cmq_send_one(hr_dev, desc, num, tx_timeout); ++ if (opcode == HNS_ROCE_OPC_POST_MB && ret == -ETIME && ++ try_cnt) { ++ spin_unlock_bh(&csq->lock); ++ mdelay(HNS_ROCE_OPC_POST_MB_RETRY_GAP_MSEC); ++ continue; ++ } ++ ++ for (i = 0; i < num; i++) { ++ desc[i] = csq->desc[rsv_tail++]; ++ if (rsv_tail == csq->desc_num) ++ rsv_tail = 0; ++ } ++ spin_unlock_bh(&csq->lock); ++ break; ++ } ++ ++ if (ret) ++ dev_err_ratelimited(hr_dev->dev, ++ "Cmdq IO error, opcode = 0x%x, return = %d.\n", ++ opcode, ret); ++ ++ return ret; ++} ++ + static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev, + struct hns_roce_cmq_desc *desc, int num) + { +diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +index cbdbc9edbce6e..91a5665465ffb 100644 +--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h ++++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +@@ -230,6 +230,8 @@ enum hns_roce_opcode_type { + }; + + #define HNS_ROCE_OPC_POST_MB_TIMEOUT 35000 ++#define HNS_ROCE_OPC_POST_MB_TRY_CNT 8 ++#define HNS_ROCE_OPC_POST_MB_RETRY_GAP_MSEC 5 + struct hns_roce_cmdq_tx_timeout_map { + u16 opcode; + u32 tx_timeout; +-- +2.39.5 + diff --git a/queue-6.12/rdma-mana_ib-allocate-page-aligned-doorbell-index.patch b/queue-6.12/rdma-mana_ib-allocate-page-aligned-doorbell-index.patch new file mode 100644 index 0000000000..ac8fd69e0d --- /dev/null +++ b/queue-6.12/rdma-mana_ib-allocate-page-aligned-doorbell-index.patch @@ -0,0 +1,39 @@ +From aaade52ae0a564519fbe2fc57cd8a248f2871090 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 5 Feb 2025 02:30:05 -0800 +Subject: RDMA/mana_ib: Allocate PAGE aligned doorbell index + +From: Konstantin Taranov + +[ Upstream commit 29b7bb98234cc287cebef9bccf638c2e3f39be71 ] + +Allocate a PAGE aligned doorbell index to ensure each process gets a +separate PAGE sized doorbell area space remapped to it in mana_ib_mmap + +Fixes: 0266a177631d ("RDMA/mana_ib: Add a driver for Microsoft Azure Network Adapter") +Signed-off-by: Shiraz Saleem +Signed-off-by: Konstantin Taranov +Link: https://patch.msgid.link/1738751405-15041-1-git-send-email-kotaranov@linux.microsoft.com +Reviewed-by: Long Li +Signed-off-by: Leon Romanovsky +Signed-off-by: Sasha Levin +--- + drivers/infiniband/hw/mana/main.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c +index 67c2d43135a8a..457cea6d99095 100644 +--- a/drivers/infiniband/hw/mana/main.c ++++ b/drivers/infiniband/hw/mana/main.c +@@ -174,7 +174,7 @@ static int mana_gd_allocate_doorbell_page(struct gdma_context *gc, + + req.resource_type = GDMA_RESOURCE_DOORBELL_PAGE; + req.num_resources = 1; +- req.alignment = 1; ++ req.alignment = PAGE_SIZE / MANA_PAGE_SIZE; + + /* Have GDMA start searching from 0 */ + req.allocated_resources = 0; +-- +2.39.5 + diff --git a/queue-6.12/rdma-mlx5-fix-a-race-for-dmabuf-mr-which-can-lead-to.patch b/queue-6.12/rdma-mlx5-fix-a-race-for-dmabuf-mr-which-can-lead-to.patch new file mode 100644 index 0000000000..062788b065 --- /dev/null +++ b/queue-6.12/rdma-mlx5-fix-a-race-for-dmabuf-mr-which-can-lead-to.patch @@ -0,0 +1,89 @@ +From feb1830aa63774200c61bb3614b759a83fad7db9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 3 Feb 2025 14:50:59 +0200 +Subject: RDMA/mlx5: Fix a race for DMABUF MR which can lead to CQE with error + +From: Yishai Hadas + +[ Upstream commit cc668a11e6ac8adb0e016711080d3f314722cc91 ] + +This patch addresses a potential race condition for a DMABUF MR that can +result in a CQE with an error on the UMR QP. + +During the __mlx5_ib_dereg_mr() flow, the following sequence of calls +occurs: +mlx5_revoke_mr() +mlx5r_umr_revoke_mr() +mlx5r_umr_post_send_wait() +At this point, the lkey is freed from the hardware's perspective. + +However, concurrently, mlx5_ib_dmabuf_invalidate_cb() might be triggered +by another task attempting to invalidate the MR having that freed lkey. + +Since the lkey has already been freed, this can lead to a CQE error, +causing the UMR QP to enter an error state. + +To resolve this race condition, the dma_resv_lock() which was hold as +part of the mlx5_ib_dmabuf_invalidate_cb() is now also acquired as part +of the mlx5_revoke_mr() scope. + +Upon a successful revoke, we set umem_dmabuf->private which points to +that MR to NULL, preventing any further invalidation attempts on its +lkey. + +Fixes: e6fb246ccafb ("RDMA/mlx5: Consolidate MR destruction to mlx5_ib_dereg_mr()") +Signed-off-by: Yishai Hadas +Reviewed-by: Artemy Kovalyov +Link: https://patch.msgid.link/70617067abbfaa0c816a2544c922e7f4346def58.1738587016.git.leon@kernel.org +Signed-off-by: Leon Romanovsky +Signed-off-by: Sasha Levin +--- + drivers/infiniband/hw/mlx5/mr.c | 13 ++++++++++++- + 1 file changed, 12 insertions(+), 1 deletion(-) + +diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c +index bb02b6adbf2c2..0a3cbb14e1839 100644 +--- a/drivers/infiniband/hw/mlx5/mr.c ++++ b/drivers/infiniband/hw/mlx5/mr.c +@@ -1550,7 +1550,7 @@ static void mlx5_ib_dmabuf_invalidate_cb(struct dma_buf_attachment *attach) + + dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv); + +- if (!umem_dmabuf->sgt) ++ if (!umem_dmabuf->sgt || !mr) + return; + + mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP); +@@ -2022,11 +2022,16 @@ static int mlx5_revoke_mr(struct mlx5_ib_mr *mr) + struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); + struct mlx5_cache_ent *ent = mr->mmkey.cache_ent; + bool is_odp = is_odp_mr(mr); ++ bool is_odp_dma_buf = is_dmabuf_mr(mr) && ++ !to_ib_umem_dmabuf(mr->umem)->pinned; + int ret = 0; + + if (is_odp) + mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex); + ++ if (is_odp_dma_buf) ++ dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv, NULL); ++ + if (mr->mmkey.cacheable && !mlx5r_umr_revoke_mr(mr) && !cache_ent_find_and_store(dev, mr)) { + ent = mr->mmkey.cache_ent; + /* upon storing to a clean temp entry - schedule its cleanup */ +@@ -2054,6 +2059,12 @@ static int mlx5_revoke_mr(struct mlx5_ib_mr *mr) + mutex_unlock(&to_ib_umem_odp(mr->umem)->umem_mutex); + } + ++ if (is_odp_dma_buf) { ++ if (!ret) ++ to_ib_umem_dmabuf(mr->umem)->private = NULL; ++ dma_resv_unlock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv); ++ } ++ + return ret; + } + +-- +2.39.5 + diff --git a/queue-6.12/rdma-mlx5-fix-a-warn-during-dereg_mr-for-dm-type.patch b/queue-6.12/rdma-mlx5-fix-a-warn-during-dereg_mr-for-dm-type.patch new file mode 100644 index 0000000000..ff9c3854cf --- /dev/null +++ b/queue-6.12/rdma-mlx5-fix-a-warn-during-dereg_mr-for-dm-type.patch @@ -0,0 +1,106 @@ +From 4ad1da24099c029fa05f0f874128bdcf400112cf Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 3 Feb 2025 14:51:43 +0200 +Subject: RDMA/mlx5: Fix a WARN during dereg_mr for DM type + +From: Yishai Hadas + +[ Upstream commit abc7b3f1f056d69a8f11d6dceecc0c9549ace770 ] + +Memory regions (MR) of type DM (device memory) do not have an associated +umem. + +In the __mlx5_ib_dereg_mr() -> mlx5_free_priv_descs() flow, the code +incorrectly takes the wrong branch, attempting to call +dma_unmap_single() on a DMA address that is not mapped. + +This results in a WARN [1], as shown below. + +The issue is resolved by properly accounting for the DM type and +ensuring the correct branch is selected in mlx5_free_priv_descs(). + +[1] +WARNING: CPU: 12 PID: 1346 at drivers/iommu/dma-iommu.c:1230 iommu_dma_unmap_page+0x79/0x90 +Modules linked in: ip6table_mangle ip6table_nat ip6table_filter ip6_tables iptable_mangle xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat br_netfilter rpcsec_gss_krb5 auth_rpcgss oid_registry ovelay rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_iscsi ib_umad rdma_cm ib_ipoib iw_cm ib_cm mlx5_ib ib_uverbs ib_core fuse mlx5_core +CPU: 12 UID: 0 PID: 1346 Comm: ibv_rc_pingpong Not tainted 6.12.0-rc7+ #1631 +Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 +RIP: 0010:iommu_dma_unmap_page+0x79/0x90 +Code: 2b 49 3b 29 72 26 49 3b 69 08 73 20 4d 89 f0 44 89 e9 4c 89 e2 48 89 ee 48 89 df 5b 5d 41 5c 41 5d 41 5e 41 5f e9 07 b8 88 ff <0f> 0b 5b 5d 41 5c 41 5d 41 5e 41 5f c3 cc cc cc cc 66 0f 1f 44 00 +RSP: 0018:ffffc90001913a10 EFLAGS: 00010246 +RAX: 0000000000000000 RBX: ffff88810194b0a8 RCX: 0000000000000000 +RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000001 +RBP: ffff88810194b0a8 R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000000 +R13: 0000000000000001 R14: 0000000000000000 R15: 0000000000000000 +FS: 00007f537abdd740(0000) GS:ffff88885fb00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00007f537aeb8000 CR3: 000000010c248001 CR4: 0000000000372eb0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + +? __warn+0x84/0x190 +? iommu_dma_unmap_page+0x79/0x90 +? report_bug+0xf8/0x1c0 +? handle_bug+0x55/0x90 +? exc_invalid_op+0x13/0x60 +? asm_exc_invalid_op+0x16/0x20 +? iommu_dma_unmap_page+0x79/0x90 +dma_unmap_page_attrs+0xe6/0x290 +mlx5_free_priv_descs+0xb0/0xe0 [mlx5_ib] +__mlx5_ib_dereg_mr+0x37e/0x520 [mlx5_ib] +? _raw_spin_unlock_irq+0x24/0x40 +? wait_for_completion+0xfe/0x130 +? rdma_restrack_put+0x63/0xe0 [ib_core] +ib_dereg_mr_user+0x5f/0x120 [ib_core] +? lock_release+0xc6/0x280 +destroy_hw_idr_uobject+0x1d/0x60 [ib_uverbs] +uverbs_destroy_uobject+0x58/0x1d0 [ib_uverbs] +uobj_destroy+0x3f/0x70 [ib_uverbs] +ib_uverbs_cmd_verbs+0x3e4/0xbb0 [ib_uverbs] +? __pfx_uverbs_destroy_def_handler+0x10/0x10 [ib_uverbs] +? lock_acquire+0xc1/0x2f0 +? ib_uverbs_ioctl+0xcb/0x170 [ib_uverbs] +? ib_uverbs_ioctl+0x116/0x170 [ib_uverbs] +? lock_release+0xc6/0x280 +ib_uverbs_ioctl+0xe7/0x170 [ib_uverbs] +? ib_uverbs_ioctl+0xcb/0x170 [ib_uverbs] +__x64_sys_ioctl+0x1b0/0xa70 +do_syscall_64+0x6b/0x140 +entry_SYSCALL_64_after_hwframe+0x76/0x7e +RIP: 0033:0x7f537adaf17b +Code: 0f 1e fa 48 8b 05 1d ad 0c 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 0f 1f 44 00 00 f3 0f 1e fa b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ed ac 0c 00 f7 d8 64 89 01 48 +RSP: 002b:00007ffff218f0b8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 +RAX: ffffffffffffffda RBX: 00007ffff218f1d8 RCX: 00007f537adaf17b +RDX: 00007ffff218f1c0 RSI: 00000000c0181b01 RDI: 0000000000000003 +RBP: 00007ffff218f1a0 R08: 00007f537aa8d010 R09: 0000561ee2e4f270 +R10: 00007f537aace3a8 R11: 0000000000000246 R12: 00007ffff218f190 +R13: 000000000000001c R14: 0000561ee2e4d7c0 R15: 00007ffff218f450 + + +Fixes: f18ec4223117 ("RDMA/mlx5: Use a union inside mlx5_ib_mr") +Signed-off-by: Yishai Hadas +Link: https://patch.msgid.link/2039c22cfc3df02378747ba4d623a558b53fc263.1738587076.git.leon@kernel.org +Signed-off-by: Leon Romanovsky +Signed-off-by: Sasha Levin +--- + drivers/infiniband/hw/mlx5/mr.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c +index 0a3cbb14e1839..753faa9ad06a8 100644 +--- a/drivers/infiniband/hw/mlx5/mr.c ++++ b/drivers/infiniband/hw/mlx5/mr.c +@@ -1935,7 +1935,8 @@ mlx5_alloc_priv_descs(struct ib_device *device, + static void + mlx5_free_priv_descs(struct mlx5_ib_mr *mr) + { +- if (!mr->umem && !mr->data_direct && mr->descs) { ++ if (!mr->umem && !mr->data_direct && ++ mr->ibmr.type != IB_MR_TYPE_DM && mr->descs) { + struct ib_device *device = mr->ibmr.device; + int size = mr->max_descs * mr->desc_size; + struct mlx5_ib_dev *dev = to_mdev(device); +-- +2.39.5 + diff --git a/queue-6.12/rdma-mlx5-fix-ah-static-rate-parsing.patch b/queue-6.12/rdma-mlx5-fix-ah-static-rate-parsing.patch new file mode 100644 index 0000000000..15b84b9eb8 --- /dev/null +++ b/queue-6.12/rdma-mlx5-fix-ah-static-rate-parsing.patch @@ -0,0 +1,84 @@ +From eada83afd116943095f2134aa2bef6bf9552ad1e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 10 Feb 2025 13:32:39 +0200 +Subject: RDMA/mlx5: Fix AH static rate parsing + +From: Patrisious Haddad + +[ Upstream commit c534ffda781f44a1c6ac25ef6e0e444da38ca8af ] + +Previously static rate wasn't translated according to our PRM but simply +used the 4 lower bytes. + +Correctly translate static rate value passed in AH creation attribute +according to our PRM expected values. + +In addition change 800GB mapping to zero, which is the PRM +specified value. + +Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") +Signed-off-by: Patrisious Haddad +Reviewed-by: Maor Gottlieb +Link: https://patch.msgid.link/18ef4cc5396caf80728341eb74738cd777596f60.1739187089.git.leon@kernel.org +Signed-off-by: Leon Romanovsky +Signed-off-by: Sasha Levin +--- + drivers/infiniband/hw/mlx5/ah.c | 3 ++- + drivers/infiniband/hw/mlx5/qp.c | 6 +++--- + drivers/infiniband/hw/mlx5/qp.h | 1 + + 3 files changed, 6 insertions(+), 4 deletions(-) + +diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c +index 505bc47fd575d..99036afb3aef0 100644 +--- a/drivers/infiniband/hw/mlx5/ah.c ++++ b/drivers/infiniband/hw/mlx5/ah.c +@@ -67,7 +67,8 @@ static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah, + ah->av.tclass = grh->traffic_class; + } + +- ah->av.stat_rate_sl = (rdma_ah_get_static_rate(ah_attr) << 4); ++ ah->av.stat_rate_sl = ++ (mlx5r_ib_rate(dev, rdma_ah_get_static_rate(ah_attr)) << 4); + + if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) { + if (init_attr->xmit_slave) +diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c +index 0d8a8b109a751..ded139b4e87aa 100644 +--- a/drivers/infiniband/hw/mlx5/qp.c ++++ b/drivers/infiniband/hw/mlx5/qp.c +@@ -3420,11 +3420,11 @@ static int ib_to_mlx5_rate_map(u8 rate) + return 0; + } + +-static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate) ++int mlx5r_ib_rate(struct mlx5_ib_dev *dev, u8 rate) + { + u32 stat_rate_support; + +- if (rate == IB_RATE_PORT_CURRENT) ++ if (rate == IB_RATE_PORT_CURRENT || rate == IB_RATE_800_GBPS) + return 0; + + if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_800_GBPS) +@@ -3569,7 +3569,7 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, + sizeof(grh->dgid.raw)); + } + +- err = ib_rate_to_mlx5(dev, rdma_ah_get_static_rate(ah)); ++ err = mlx5r_ib_rate(dev, rdma_ah_get_static_rate(ah)); + if (err < 0) + return err; + MLX5_SET(ads, path, stat_rate, err); +diff --git a/drivers/infiniband/hw/mlx5/qp.h b/drivers/infiniband/hw/mlx5/qp.h +index b6ee7c3ee1ca1..2530e7730635f 100644 +--- a/drivers/infiniband/hw/mlx5/qp.h ++++ b/drivers/infiniband/hw/mlx5/qp.h +@@ -56,4 +56,5 @@ int mlx5_core_xrcd_dealloc(struct mlx5_ib_dev *dev, u32 xrcdn); + int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter); + int mlx5_ib_qp_event_init(void); + void mlx5_ib_qp_event_cleanup(void); ++int mlx5r_ib_rate(struct mlx5_ib_dev *dev, u8 rate); + #endif /* _MLX5_IB_QP_H */ +-- +2.39.5 + diff --git a/queue-6.12/rdma-mlx5-fix-bind-qp-error-cleanup-flow.patch b/queue-6.12/rdma-mlx5-fix-bind-qp-error-cleanup-flow.patch new file mode 100644 index 0000000000..0cb4a94f7a --- /dev/null +++ b/queue-6.12/rdma-mlx5-fix-bind-qp-error-cleanup-flow.patch @@ -0,0 +1,63 @@ +From bcb7e88c0be250e3ebe2e2236d75037612df43f5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 20 Feb 2025 08:47:10 +0200 +Subject: RDMA/mlx5: Fix bind QP error cleanup flow + +From: Patrisious Haddad + +[ Upstream commit e1a0bdbdfdf08428f0ede5ae49c7f4139ac73ef5 ] + +When there is a failure during bind QP, the cleanup flow destroys the +counter regardless if it is the one that created it or not, which is +problematic since if it isn't the one that created it, that counter could +still be in use. + +Fix that by destroying the counter only if it was created during this call. + +Fixes: 45842fc627c7 ("IB/mlx5: Support statistic q counter configuration") +Signed-off-by: Patrisious Haddad +Reviewed-by: Mark Zhang +Link: https://patch.msgid.link/25dfefddb0ebefa668c32e06a94d84e3216257cf.1740033937.git.leon@kernel.org +Reviewed-by: Zhu Yanjun +Signed-off-by: Leon Romanovsky +Signed-off-by: Sasha Levin +--- + drivers/infiniband/hw/mlx5/counters.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c +index 4f6c1968a2ee3..81cfa74147a18 100644 +--- a/drivers/infiniband/hw/mlx5/counters.c ++++ b/drivers/infiniband/hw/mlx5/counters.c +@@ -546,6 +546,7 @@ static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter, + struct ib_qp *qp) + { + struct mlx5_ib_dev *dev = to_mdev(qp->device); ++ bool new = false; + int err; + + if (!counter->id) { +@@ -560,6 +561,7 @@ static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter, + return err; + counter->id = + MLX5_GET(alloc_q_counter_out, out, counter_set_id); ++ new = true; + } + + err = mlx5_ib_qp_set_counter(qp, counter); +@@ -569,8 +571,10 @@ static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter, + return 0; + + fail_set_counter: +- mlx5_ib_counter_dealloc(counter); +- counter->id = 0; ++ if (new) { ++ mlx5_ib_counter_dealloc(counter); ++ counter->id = 0; ++ } + + return err; + } +-- +2.39.5 + diff --git a/queue-6.12/rdma-mlx5-fix-implicit-odp-hang-on-parent-deregistra.patch b/queue-6.12/rdma-mlx5-fix-implicit-odp-hang-on-parent-deregistra.patch new file mode 100644 index 0000000000..84f612cd0b --- /dev/null +++ b/queue-6.12/rdma-mlx5-fix-implicit-odp-hang-on-parent-deregistra.patch @@ -0,0 +1,84 @@ +From 68c7dc6b2e521b356912667e2dd39da8ce999a8a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 10 Feb 2025 13:31:11 +0200 +Subject: RDMA/mlx5: Fix implicit ODP hang on parent deregistration + +From: Yishai Hadas + +[ Upstream commit 3d8c6f26893d55fab218ad086719de1fc9bb86ba ] + +Fix the destroy_unused_implicit_child_mr() to prevent hanging during +parent deregistration as of below [1]. + +Upon entering destroy_unused_implicit_child_mr(), the reference count +for the implicit MR parent is incremented using: +refcount_inc_not_zero(). + +A corresponding decrement must be performed if +free_implicit_child_mr_work() is not called. + +The code has been updated to properly manage the reference count that +was incremented. + +[1] +INFO: task python3:2157 blocked for more than 120 seconds. +Not tainted 6.12.0-rc7+ #1633 +"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. +task:python3 state:D stack:0 pid:2157 tgid:2157 ppid:1685 flags:0x00000000 +Call Trace: + +__schedule+0x420/0xd30 +schedule+0x47/0x130 +__mlx5_ib_dereg_mr+0x379/0x5d0 [mlx5_ib] +? __pfx_autoremove_wake_function+0x10/0x10 +ib_dereg_mr_user+0x5f/0x120 [ib_core] +? lock_release+0xc6/0x280 +destroy_hw_idr_uobject+0x1d/0x60 [ib_uverbs] +uverbs_destroy_uobject+0x58/0x1d0 [ib_uverbs] +uobj_destroy+0x3f/0x70 [ib_uverbs] +ib_uverbs_cmd_verbs+0x3e4/0xbb0 [ib_uverbs] +? __pfx_uverbs_destroy_def_handler+0x10/0x10 [ib_uverbs] +? lock_acquire+0xc1/0x2f0 +? ib_uverbs_ioctl+0xcb/0x170 [ib_uverbs] +? ib_uverbs_ioctl+0x116/0x170 [ib_uverbs] +? lock_release+0xc6/0x280 +ib_uverbs_ioctl+0xe7/0x170 [ib_uverbs] +? ib_uverbs_ioctl+0xcb/0x170 [ib_uverbs] + __x64_sys_ioctl+0x1b0/0xa70 +? kmem_cache_free+0x221/0x400 +do_syscall_64+0x6b/0x140 +entry_SYSCALL_64_after_hwframe+0x76/0x7e +RIP: 0033:0x7f20f21f017b +RSP: 002b:00007ffcfc4a77c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 +RAX: ffffffffffffffda RBX: 00007ffcfc4a78d8 RCX: 00007f20f21f017b +RDX: 00007ffcfc4a78c0 RSI: 00000000c0181b01 RDI: 0000000000000003 +RBP: 00007ffcfc4a78a0 R08: 000056147d125190 R09: 00007f20f1f14c60 +R10: 0000000000000001 R11: 0000000000000246 R12: 00007ffcfc4a7890 +R13: 000000000000001c R14: 000056147d100fc0 R15: 00007f20e365c9d0 + + +Fixes: d3d930411ce3 ("RDMA/mlx5: Fix implicit ODP use after free") +Signed-off-by: Yishai Hadas +Reviewed-by: Artemy Kovalyov +Link: https://patch.msgid.link/80f2fcd19952dfa7d9981d93fd6359b4471f8278.1739186929.git.leon@kernel.org +Signed-off-by: Leon Romanovsky +Signed-off-by: Sasha Levin +--- + drivers/infiniband/hw/mlx5/odp.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c +index 1d3bf56157702..b4e2a6f9cb9c3 100644 +--- a/drivers/infiniband/hw/mlx5/odp.c ++++ b/drivers/infiniband/hw/mlx5/odp.c +@@ -242,6 +242,7 @@ static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr) + if (__xa_cmpxchg(&imr->implicit_children, idx, mr, NULL, GFP_KERNEL) != + mr) { + xa_unlock(&imr->implicit_children); ++ mlx5r_deref_odp_mkey(&imr->mmkey); + return; + } + +-- +2.39.5 + diff --git a/queue-6.12/rdma-mlx5-fix-the-recovery-flow-of-the-umr-qp.patch b/queue-6.12/rdma-mlx5-fix-the-recovery-flow-of-the-umr-qp.patch new file mode 100644 index 0000000000..438659b97d --- /dev/null +++ b/queue-6.12/rdma-mlx5-fix-the-recovery-flow-of-the-umr-qp.patch @@ -0,0 +1,209 @@ +From 28fb7fae65ea7380048859088afbf9d61842398c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 19 Jan 2025 14:36:13 +0200 +Subject: RDMA/mlx5: Fix the recovery flow of the UMR QP + +From: Yishai Hadas + +[ Upstream commit d97505baea64d93538b16baf14ce7b8c1fbad746 ] + +This patch addresses an issue in the recovery flow of the UMR QP, +ensuring tasks do not get stuck, as highlighted by the call trace [1]. + +During recovery, before transitioning the QP to the RESET state, the +software must wait for all outstanding WRs to complete. + +Failing to do so can cause the firmware to skip sending some flushed +CQEs with errors and simply discard them upon the RESET, as per the IB +specification. + +This race condition can result in lost CQEs and tasks becoming stuck. + +To resolve this, the patch sends a final WR which serves only as a +barrier before moving the QP state to RESET. + +Once a CQE is received for that final WR, it guarantees that no +outstanding WRs remain, making it safe to transition the QP to RESET and +subsequently back to RTS, restoring proper functionality. + +Note: +For the barrier WR, we simply reuse the failed and ready WR. +Since the QP is in an error state, it will only receive +IB_WC_WR_FLUSH_ERR. However, as it serves only as a barrier we don't +care about its status. + +[1] +INFO: task rdma_resource_l:1922 blocked for more than 120 seconds. +Tainted: G W 6.12.0-rc7+ #1626 +"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. +task:rdma_resource_l state:D stack:0 pid:1922 tgid:1922 ppid:1369 + flags:0x00004004 +Call Trace: + +__schedule+0x420/0xd30 +schedule+0x47/0x130 +schedule_timeout+0x280/0x300 +? mark_held_locks+0x48/0x80 +? lockdep_hardirqs_on_prepare+0xe5/0x1a0 +wait_for_completion+0x75/0x130 +mlx5r_umr_post_send_wait+0x3c2/0x5b0 [mlx5_ib] +? __pfx_mlx5r_umr_done+0x10/0x10 [mlx5_ib] +mlx5r_umr_revoke_mr+0x93/0xc0 [mlx5_ib] +__mlx5_ib_dereg_mr+0x299/0x520 [mlx5_ib] +? _raw_spin_unlock_irq+0x24/0x40 +? wait_for_completion+0xfe/0x130 +? rdma_restrack_put+0x63/0xe0 [ib_core] +ib_dereg_mr_user+0x5f/0x120 [ib_core] +? lock_release+0xc6/0x280 +destroy_hw_idr_uobject+0x1d/0x60 [ib_uverbs] +uverbs_destroy_uobject+0x58/0x1d0 [ib_uverbs] +uobj_destroy+0x3f/0x70 [ib_uverbs] +ib_uverbs_cmd_verbs+0x3e4/0xbb0 [ib_uverbs] +? __pfx_uverbs_destroy_def_handler+0x10/0x10 [ib_uverbs] +? __lock_acquire+0x64e/0x2080 +? mark_held_locks+0x48/0x80 +? find_held_lock+0x2d/0xa0 +? lock_acquire+0xc1/0x2f0 +? ib_uverbs_ioctl+0xcb/0x170 [ib_uverbs] +? __fget_files+0xc3/0x1b0 +ib_uverbs_ioctl+0xe7/0x170 [ib_uverbs] +? ib_uverbs_ioctl+0xcb/0x170 [ib_uverbs] +__x64_sys_ioctl+0x1b0/0xa70 +do_syscall_64+0x6b/0x140 +entry_SYSCALL_64_after_hwframe+0x76/0x7e +RIP: 0033:0x7f99c918b17b +RSP: 002b:00007ffc766d0468 EFLAGS: 00000246 ORIG_RAX: + 0000000000000010 +RAX: ffffffffffffffda RBX: 00007ffc766d0578 RCX: + 00007f99c918b17b +RDX: 00007ffc766d0560 RSI: 00000000c0181b01 RDI: + 0000000000000003 +RBP: 00007ffc766d0540 R08: 00007f99c8f99010 R09: + 000000000000bd7e +R10: 00007f99c94c1c70 R11: 0000000000000246 R12: + 00007ffc766d0530 +R13: 000000000000001c R14: 0000000040246a80 R15: + 0000000000000000 + + +Fixes: 158e71bb69e3 ("RDMA/mlx5: Add a umr recovery flow") +Signed-off-by: Yishai Hadas +Reviewed-by: Michael Guralnik +Link: https://patch.msgid.link/27b51b92ec42dfb09d8096fcbd51878f397ce6ec.1737290141.git.leon@kernel.org +Signed-off-by: Leon Romanovsky +Signed-off-by: Sasha Levin +--- + drivers/infiniband/hw/mlx5/umr.c | 83 +++++++++++++++++++++----------- + 1 file changed, 56 insertions(+), 27 deletions(-) + +diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c +index 887fd6fa3ba93..793f3c5c4d012 100644 +--- a/drivers/infiniband/hw/mlx5/umr.c ++++ b/drivers/infiniband/hw/mlx5/umr.c +@@ -231,30 +231,6 @@ void mlx5r_umr_cleanup(struct mlx5_ib_dev *dev) + ib_dealloc_pd(dev->umrc.pd); + } + +-static int mlx5r_umr_recover(struct mlx5_ib_dev *dev) +-{ +- struct umr_common *umrc = &dev->umrc; +- struct ib_qp_attr attr; +- int err; +- +- attr.qp_state = IB_QPS_RESET; +- err = ib_modify_qp(umrc->qp, &attr, IB_QP_STATE); +- if (err) { +- mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n"); +- goto err; +- } +- +- err = mlx5r_umr_qp_rst2rts(dev, umrc->qp); +- if (err) +- goto err; +- +- umrc->state = MLX5_UMR_STATE_ACTIVE; +- return 0; +- +-err: +- umrc->state = MLX5_UMR_STATE_ERR; +- return err; +-} + + static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe, + struct mlx5r_umr_wqe *wqe, bool with_data) +@@ -302,6 +278,61 @@ static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe, + return err; + } + ++static int mlx5r_umr_recover(struct mlx5_ib_dev *dev, u32 mkey, ++ struct mlx5r_umr_context *umr_context, ++ struct mlx5r_umr_wqe *wqe, bool with_data) ++{ ++ struct umr_common *umrc = &dev->umrc; ++ struct ib_qp_attr attr; ++ int err; ++ ++ mutex_lock(&umrc->lock); ++ /* Preventing any further WRs to be sent now */ ++ if (umrc->state != MLX5_UMR_STATE_RECOVER) { ++ mlx5_ib_warn(dev, "UMR recovery encountered an unexpected state=%d\n", ++ umrc->state); ++ umrc->state = MLX5_UMR_STATE_RECOVER; ++ } ++ mutex_unlock(&umrc->lock); ++ ++ /* Sending a final/barrier WR (the failed one) and wait for its completion. ++ * This will ensure that all the previous WRs got a completion before ++ * we set the QP state to RESET. ++ */ ++ err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context->cqe, wqe, ++ with_data); ++ if (err) { ++ mlx5_ib_warn(dev, "UMR recovery post send failed, err %d\n", err); ++ goto err; ++ } ++ ++ /* Since the QP is in an error state, it will only receive ++ * IB_WC_WR_FLUSH_ERR. However, as it serves only as a barrier ++ * we don't care about its status. ++ */ ++ wait_for_completion(&umr_context->done); ++ ++ attr.qp_state = IB_QPS_RESET; ++ err = ib_modify_qp(umrc->qp, &attr, IB_QP_STATE); ++ if (err) { ++ mlx5_ib_warn(dev, "Couldn't modify UMR QP to RESET, err=%d\n", err); ++ goto err; ++ } ++ ++ err = mlx5r_umr_qp_rst2rts(dev, umrc->qp); ++ if (err) { ++ mlx5_ib_warn(dev, "Couldn't modify UMR QP to RTS, err=%d\n", err); ++ goto err; ++ } ++ ++ umrc->state = MLX5_UMR_STATE_ACTIVE; ++ return 0; ++ ++err: ++ umrc->state = MLX5_UMR_STATE_ERR; ++ return err; ++} ++ + static void mlx5r_umr_done(struct ib_cq *cq, struct ib_wc *wc) + { + struct mlx5_ib_umr_context *context = +@@ -366,9 +397,7 @@ static int mlx5r_umr_post_send_wait(struct mlx5_ib_dev *dev, u32 mkey, + mlx5_ib_warn(dev, + "reg umr failed (%u). Trying to recover and resubmit the flushed WQEs, mkey = %u\n", + umr_context.status, mkey); +- mutex_lock(&umrc->lock); +- err = mlx5r_umr_recover(dev); +- mutex_unlock(&umrc->lock); ++ err = mlx5r_umr_recover(dev, mkey, &umr_context, wqe, with_data); + if (err) + mlx5_ib_warn(dev, "couldn't recover UMR, err %d\n", + err); +-- +2.39.5 + diff --git a/queue-6.12/scsi-core-clear-driver-private-data-when-retrying-re.patch b/queue-6.12/scsi-core-clear-driver-private-data-when-retrying-re.patch new file mode 100644 index 0000000000..cea3f830fe --- /dev/null +++ b/queue-6.12/scsi-core-clear-driver-private-data-when-retrying-re.patch @@ -0,0 +1,68 @@ +From ef8c8d12b8d5bdcabc0dd000b1df1c1afd05ed26 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 17 Feb 2025 10:16:28 +0800 +Subject: scsi: core: Clear driver private data when retrying request + +From: Ye Bin + +[ Upstream commit dce5c4afd035e8090a26e5d776b1682c0e649683 ] + +After commit 1bad6c4a57ef ("scsi: zero per-cmd private driver data for each +MQ I/O"), the xen-scsifront/virtio_scsi/snic drivers all removed code that +explicitly zeroed driver-private command data. + +In combination with commit 464a00c9e0ad ("scsi: core: Kill DRIVER_SENSE"), +after virtio_scsi performs a capacity expansion, the first request will +return a unit attention to indicate that the capacity has changed. And then +the original command is retried. As driver-private command data was not +cleared, the request would return UA again and eventually time out and fail. + +Zero driver-private command data when a request is retried. + +Fixes: f7de50da1479 ("scsi: xen-scsifront: Remove code that zeroes driver-private command data") +Fixes: c2bb87318baa ("scsi: virtio_scsi: Remove code that zeroes driver-private command data") +Fixes: c3006a926468 ("scsi: snic: Remove code that zeroes driver-private command data") +Signed-off-by: Ye Bin +Reviewed-by: Bart Van Assche +Link: https://lore.kernel.org/r/20250217021628.2929248-1-yebin@huaweicloud.com +Signed-off-by: Martin K. Petersen +Signed-off-by: Sasha Levin +--- + drivers/scsi/scsi_lib.c | 14 +++++++------- + 1 file changed, 7 insertions(+), 7 deletions(-) + +diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c +index c9dde1ac9523e..3023b07dc483b 100644 +--- a/drivers/scsi/scsi_lib.c ++++ b/drivers/scsi/scsi_lib.c +@@ -1653,13 +1653,6 @@ static blk_status_t scsi_prepare_cmd(struct request *req) + if (in_flight) + __set_bit(SCMD_STATE_INFLIGHT, &cmd->state); + +- /* +- * Only clear the driver-private command data if the LLD does not supply +- * a function to initialize that data. +- */ +- if (!shost->hostt->init_cmd_priv) +- memset(cmd + 1, 0, shost->hostt->cmd_size); +- + cmd->prot_op = SCSI_PROT_NORMAL; + if (blk_rq_bytes(req)) + cmd->sc_data_direction = rq_dma_dir(req); +@@ -1826,6 +1819,13 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx, + if (!scsi_host_queue_ready(q, shost, sdev, cmd)) + goto out_dec_target_busy; + ++ /* ++ * Only clear the driver-private command data if the LLD does not supply ++ * a function to initialize that data. ++ */ ++ if (shost->hostt->cmd_size && !shost->hostt->init_cmd_priv) ++ memset(cmd + 1, 0, shost->hostt->cmd_size); ++ + if (!(req->rq_flags & RQF_DONTPREP)) { + ret = scsi_prepare_cmd(req); + if (ret != BLK_STS_OK) +-- +2.39.5 + diff --git a/queue-6.12/scsi-ufs-core-fix-ufshcd_is_ufs_dev_busy-and-ufshcd_.patch b/queue-6.12/scsi-ufs-core-fix-ufshcd_is_ufs_dev_busy-and-ufshcd_.patch new file mode 100644 index 0000000000..d88ee814fe --- /dev/null +++ b/queue-6.12/scsi-ufs-core-fix-ufshcd_is_ufs_dev_busy-and-ufshcd_.patch @@ -0,0 +1,62 @@ +From 626cfc48fd223948930ace2730e6a4a0019176ed Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 14 Feb 2025 14:43:44 -0800 +Subject: scsi: ufs: core: Fix ufshcd_is_ufs_dev_busy() and + ufshcd_eh_timed_out() + +From: Bart Van Assche + +[ Upstream commit 4fa382be430421e1445f9c95c4dc9b7e0949ae8a ] + +ufshcd_is_ufs_dev_busy(), ufshcd_print_host_state() and +ufshcd_eh_timed_out() are used in both modes (legacy mode and MCQ mode). +hba->outstanding_reqs only represents the outstanding requests in legacy +mode. Hence, change hba->outstanding_reqs into scsi_host_busy(hba->host) in +these functions. + +Fixes: eacb139b77ff ("scsi: ufs: core: mcq: Enable multi-circular queue") +Signed-off-by: Bart Van Assche +Link: https://lore.kernel.org/r/20250214224352.3025151-1-bvanassche@acm.org +Reviewed-by: Peter Wang +Signed-off-by: Martin K. Petersen +Signed-off-by: Sasha Levin +--- + drivers/ufs/core/ufshcd.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c +index 67410c4cebee6..ff4878cf882be 100644 +--- a/drivers/ufs/core/ufshcd.c ++++ b/drivers/ufs/core/ufshcd.c +@@ -266,7 +266,7 @@ static bool ufshcd_has_pending_tasks(struct ufs_hba *hba) + + static bool ufshcd_is_ufs_dev_busy(struct ufs_hba *hba) + { +- return hba->outstanding_reqs || ufshcd_has_pending_tasks(hba); ++ return scsi_host_busy(hba->host) || ufshcd_has_pending_tasks(hba); + } + + static const struct ufs_dev_quirk ufs_fixups[] = { +@@ -639,8 +639,8 @@ static void ufshcd_print_host_state(struct ufs_hba *hba) + const struct scsi_device *sdev_ufs = hba->ufs_device_wlun; + + dev_err(hba->dev, "UFS Host state=%d\n", hba->ufshcd_state); +- dev_err(hba->dev, "outstanding reqs=0x%lx tasks=0x%lx\n", +- hba->outstanding_reqs, hba->outstanding_tasks); ++ dev_err(hba->dev, "%d outstanding reqs, tasks=0x%lx\n", ++ scsi_host_busy(hba->host), hba->outstanding_tasks); + dev_err(hba->dev, "saved_err=0x%x, saved_uic_err=0x%x\n", + hba->saved_err, hba->saved_uic_err); + dev_err(hba->dev, "Device power mode=%d, UIC link state=%d\n", +@@ -8975,7 +8975,7 @@ static enum scsi_timeout_action ufshcd_eh_timed_out(struct scsi_cmnd *scmd) + dev_info(hba->dev, "%s() finished; outstanding_tasks = %#lx.\n", + __func__, hba->outstanding_tasks); + +- return hba->outstanding_reqs ? SCSI_EH_RESET_TIMER : SCSI_EH_DONE; ++ return scsi_host_busy(hba->host) ? SCSI_EH_RESET_TIMER : SCSI_EH_DONE; + } + + static const struct attribute_group *ufshcd_driver_groups[] = { +-- +2.39.5 + diff --git a/queue-6.12/scsi-ufs-core-set-default-runtime-system-pm-levels-b.patch b/queue-6.12/scsi-ufs-core-set-default-runtime-system-pm-levels-b.patch new file mode 100644 index 0000000000..6a058b1f82 --- /dev/null +++ b/queue-6.12/scsi-ufs-core-set-default-runtime-system-pm-levels-b.patch @@ -0,0 +1,83 @@ +From 14920c26600273a6826591fdce0db8fd288a2b35 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 19 Feb 2025 16:20:47 +0530 +Subject: scsi: ufs: core: Set default runtime/system PM levels before + ufshcd_hba_init() + +From: Manivannan Sadhasivam + +[ Upstream commit fe06b7c07f3fbcce2a2ca6f7b0d543b5699ea00f ] + +Commit bb9850704c04 ("scsi: ufs: core: Honor runtime/system PM levels if +set by host controller drivers") introduced the check for setting default +PM levels only if the levels are uninitialized by the host controller +drivers. But it missed the fact that the levels could be initialized to 0 +(UFS_PM_LVL_0) on purpose by the controller drivers. Even though none of +the drivers are doing so now, the logic should be fixed irrespectively. + +So set the default levels unconditionally before calling ufshcd_hba_init() +API which initializes the controller drivers. It ensures that the +controller drivers could override the default levels if required. + +Fixes: bb9850704c04 ("scsi: ufs: core: Honor runtime/system PM levels if set by host controller drivers") +Reported-by: Bao D. Nguyen +Signed-off-by: Manivannan Sadhasivam +Link: https://lore.kernel.org/r/20250219105047.49932-1-manivannan.sadhasivam@linaro.org +Reviewed-by: Bart Van Assche +Signed-off-by: Martin K. Petersen +Signed-off-by: Sasha Levin +--- + drivers/ufs/core/ufshcd.c | 30 +++++++++++++++--------------- + 1 file changed, 15 insertions(+), 15 deletions(-) + +diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c +index ff4878cf882be..a3e95ef5eda82 100644 +--- a/drivers/ufs/core/ufshcd.c ++++ b/drivers/ufs/core/ufshcd.c +@@ -10457,6 +10457,21 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) + */ + spin_lock_init(&hba->clk_gating.lock); + ++ /* ++ * Set the default power management level for runtime and system PM. ++ * Host controller drivers can override them in their ++ * 'ufs_hba_variant_ops::init' callback. ++ * ++ * Default power saving mode is to keep UFS link in Hibern8 state ++ * and UFS device in sleep state. ++ */ ++ hba->rpm_lvl = ufs_get_desired_pm_lvl_for_dev_link_state( ++ UFS_SLEEP_PWR_MODE, ++ UIC_LINK_HIBERN8_STATE); ++ hba->spm_lvl = ufs_get_desired_pm_lvl_for_dev_link_state( ++ UFS_SLEEP_PWR_MODE, ++ UIC_LINK_HIBERN8_STATE); ++ + err = ufshcd_hba_init(hba); + if (err) + goto out_error; +@@ -10606,21 +10621,6 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) + goto free_tmf_queue; + } + +- /* +- * Set the default power management level for runtime and system PM if +- * not set by the host controller drivers. +- * Default power saving mode is to keep UFS link in Hibern8 state +- * and UFS device in sleep state. +- */ +- if (!hba->rpm_lvl) +- hba->rpm_lvl = ufs_get_desired_pm_lvl_for_dev_link_state( +- UFS_SLEEP_PWR_MODE, +- UIC_LINK_HIBERN8_STATE); +- if (!hba->spm_lvl) +- hba->spm_lvl = ufs_get_desired_pm_lvl_for_dev_link_state( +- UFS_SLEEP_PWR_MODE, +- UIC_LINK_HIBERN8_STATE); +- + INIT_DELAYED_WORK(&hba->rpm_dev_flush_recheck_work, ufshcd_rpm_dev_flush_recheck_work); + INIT_DELAYED_WORK(&hba->ufs_rtc_update_work, ufshcd_rtc_work); + +-- +2.39.5 + diff --git a/queue-6.12/series b/queue-6.12/series new file mode 100644 index 0000000000..beb22721e0 --- /dev/null +++ b/queue-6.12/series @@ -0,0 +1,27 @@ +rdma-mlx5-fix-the-recovery-flow-of-the-umr-qp.patch +ib-mlx5-set-and-get-correct-qp_num-for-a-dct-qp.patch +rdma-mlx5-fix-a-race-for-dmabuf-mr-which-can-lead-to.patch +rdma-mlx5-fix-a-warn-during-dereg_mr-for-dm-type.patch +rdma-mana_ib-allocate-page-aligned-doorbell-index.patch +rdma-hns-fix-mbox-timing-out-by-adding-retry-mechani.patch +rdma-bnxt_re-fail-probe-early-when-not-enough-msi-x-.patch +rdma-bnxt_re-refactor-nq-allocation.patch +rdma-bnxt_re-cache-msix-info-to-a-local-structure.patch +rdma-bnxt_re-add-sanity-checks-on-rdev-validity.patch +rdma-bnxt_re-allocate-dev_attr-information-dynamical.patch +rdma-bnxt_re-fix-the-statistics-for-gen-p7-vf.patch +landlock-fix-non-tcp-sockets-restriction.patch +scsi-ufs-core-fix-ufshcd_is_ufs_dev_busy-and-ufshcd_.patch +ovl-fix-uaf-in-ovl_dentry_update_reval-by-moving-dpu.patch +nfs-o_direct-writes-must-check-and-adjust-the-file-l.patch +nfs-adjust-delegated-timestamps-for-o_direct-reads-a.patch +sunrpc-prevent-looping-due-to-rpc_signal_task-races.patch +nfsv4-fix-a-deadlock-when-recovering-state-on-a-sill.patch +sunrpc-handle-etimedout-return-from-tlshd.patch +rdma-mlx5-fix-implicit-odp-hang-on-parent-deregistra.patch +rdma-mlx5-fix-ah-static-rate-parsing.patch +scsi-core-clear-driver-private-data-when-retrying-re.patch +scsi-ufs-core-set-default-runtime-system-pm-levels-b.patch +rdma-mlx5-fix-bind-qp-error-cleanup-flow.patch +rdma-bnxt_re-fix-the-page-details-for-the-srq-create.patch +sunrpc-suppress-warnings-for-unused-procfs-functions.patch diff --git a/queue-6.12/sunrpc-handle-etimedout-return-from-tlshd.patch b/queue-6.12/sunrpc-handle-etimedout-return-from-tlshd.patch new file mode 100644 index 0000000000..82426f0cba --- /dev/null +++ b/queue-6.12/sunrpc-handle-etimedout-return-from-tlshd.patch @@ -0,0 +1,48 @@ +From d8ffb28d02f3ac345d09f920e7725cfaf51cbb9d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 11 Feb 2025 12:31:57 -0500 +Subject: SUNRPC: Handle -ETIMEDOUT return from tlshd + +From: Benjamin Coddington + +[ Upstream commit 7a2f6f7687c5f7083a35317cddec5ad9fa491443 ] + +If the TLS handshake attempt returns -ETIMEDOUT, we currently translate +that error into -EACCES. This becomes problematic for cases where the RPC +layer is attempting to re-connect in paths that don't resonably handle +-EACCES, for example: writeback. The RPC layer can handle -ETIMEDOUT quite +well, however - so if the handshake returns this error let's just pass it +along. + +Fixes: 75eb6af7acdf ("SUNRPC: Add a TCP-with-TLS RPC transport class") +Signed-off-by: Benjamin Coddington +Signed-off-by: Anna Schumaker +Signed-off-by: Sasha Levin +--- + net/sunrpc/xprtsock.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c +index b69e6290acfab..171ad4e2523f1 100644 +--- a/net/sunrpc/xprtsock.c ++++ b/net/sunrpc/xprtsock.c +@@ -2580,7 +2580,15 @@ static void xs_tls_handshake_done(void *data, int status, key_serial_t peerid) + struct sock_xprt *lower_transport = + container_of(lower_xprt, struct sock_xprt, xprt); + +- lower_transport->xprt_err = status ? -EACCES : 0; ++ switch (status) { ++ case 0: ++ case -EACCES: ++ case -ETIMEDOUT: ++ lower_transport->xprt_err = status; ++ break; ++ default: ++ lower_transport->xprt_err = -EACCES; ++ } + complete(&lower_transport->handshake_done); + xprt_put(lower_xprt); + } +-- +2.39.5 + diff --git a/queue-6.12/sunrpc-prevent-looping-due-to-rpc_signal_task-races.patch b/queue-6.12/sunrpc-prevent-looping-due-to-rpc_signal_task-races.patch new file mode 100644 index 0000000000..b83525ba6b --- /dev/null +++ b/queue-6.12/sunrpc-prevent-looping-due-to-rpc_signal_task-races.patch @@ -0,0 +1,79 @@ +From 52ae254590c723e44592fa34ed0312814759b72f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 1 Feb 2025 15:00:02 -0500 +Subject: SUNRPC: Prevent looping due to rpc_signal_task() races + +From: Trond Myklebust + +[ Upstream commit 5bbd6e863b15a85221e49b9bdb2d5d8f0bb91f3d ] + +If rpc_signal_task() is called while a task is in an rpc_call_done() +callback function, and the latter calls rpc_restart_call(), the task can +end up looping due to the RPC_TASK_SIGNALLED flag being set without the +tk_rpc_status being set. +Removing the redundant mechanism for signalling the task fixes the +looping behaviour. + +Reported-by: Li Lingfeng +Fixes: 39494194f93b ("SUNRPC: Fix races with rpc_killall_tasks()") +Signed-off-by: Trond Myklebust +Reviewed-by: Jeff Layton +Signed-off-by: Anna Schumaker +Signed-off-by: Sasha Levin +--- + include/linux/sunrpc/sched.h | 3 +-- + include/trace/events/sunrpc.h | 3 +-- + net/sunrpc/sched.c | 2 -- + 3 files changed, 2 insertions(+), 6 deletions(-) + +diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h +index fec1e8a1570c3..eac57914dcf32 100644 +--- a/include/linux/sunrpc/sched.h ++++ b/include/linux/sunrpc/sched.h +@@ -158,7 +158,6 @@ enum { + RPC_TASK_NEED_XMIT, + RPC_TASK_NEED_RECV, + RPC_TASK_MSG_PIN_WAIT, +- RPC_TASK_SIGNALLED, + }; + + #define rpc_test_and_set_running(t) \ +@@ -171,7 +170,7 @@ enum { + + #define RPC_IS_ACTIVATED(t) test_bit(RPC_TASK_ACTIVE, &(t)->tk_runstate) + +-#define RPC_SIGNALLED(t) test_bit(RPC_TASK_SIGNALLED, &(t)->tk_runstate) ++#define RPC_SIGNALLED(t) (READ_ONCE(task->tk_rpc_status) == -ERESTARTSYS) + + /* + * Task priorities. +diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h +index 5e84952166895..5fe852bd31abc 100644 +--- a/include/trace/events/sunrpc.h ++++ b/include/trace/events/sunrpc.h +@@ -360,8 +360,7 @@ TRACE_EVENT(rpc_request, + { (1UL << RPC_TASK_ACTIVE), "ACTIVE" }, \ + { (1UL << RPC_TASK_NEED_XMIT), "NEED_XMIT" }, \ + { (1UL << RPC_TASK_NEED_RECV), "NEED_RECV" }, \ +- { (1UL << RPC_TASK_MSG_PIN_WAIT), "MSG_PIN_WAIT" }, \ +- { (1UL << RPC_TASK_SIGNALLED), "SIGNALLED" }) ++ { (1UL << RPC_TASK_MSG_PIN_WAIT), "MSG_PIN_WAIT" }) + + DECLARE_EVENT_CLASS(rpc_task_running, + +diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c +index cef623ea15060..9b45fbdc90cab 100644 +--- a/net/sunrpc/sched.c ++++ b/net/sunrpc/sched.c +@@ -864,8 +864,6 @@ void rpc_signal_task(struct rpc_task *task) + if (!rpc_task_set_rpc_status(task, -ERESTARTSYS)) + return; + trace_rpc_task_signalled(task, task->tk_action); +- set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate); +- smp_mb__after_atomic(); + queue = READ_ONCE(task->tk_waitqueue); + if (queue) + rpc_wake_up_queued_task(queue, task); +-- +2.39.5 + diff --git a/queue-6.12/sunrpc-suppress-warnings-for-unused-procfs-functions.patch b/queue-6.12/sunrpc-suppress-warnings-for-unused-procfs-functions.patch new file mode 100644 index 0000000000..2bef610fc3 --- /dev/null +++ b/queue-6.12/sunrpc-suppress-warnings-for-unused-procfs-functions.patch @@ -0,0 +1,71 @@ +From ce375a811f5fb13aea090c60728220e44ac72a89 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 25 Feb 2025 15:52:21 +0100 +Subject: sunrpc: suppress warnings for unused procfs functions + +From: Arnd Bergmann + +[ Upstream commit 1f7a4f98c11fbeb18ed21f3b3a497e90a50ad2e0 ] + +There is a warning about unused variables when building with W=1 and no procfs: + +net/sunrpc/cache.c:1660:30: error: 'cache_flush_proc_ops' defined but not used [-Werror=unused-const-variable=] + 1660 | static const struct proc_ops cache_flush_proc_ops = { + | ^~~~~~~~~~~~~~~~~~~~ +net/sunrpc/cache.c:1622:30: error: 'content_proc_ops' defined but not used [-Werror=unused-const-variable=] + 1622 | static const struct proc_ops content_proc_ops = { + | ^~~~~~~~~~~~~~~~ +net/sunrpc/cache.c:1598:30: error: 'cache_channel_proc_ops' defined but not used [-Werror=unused-const-variable=] + 1598 | static const struct proc_ops cache_channel_proc_ops = { + | ^~~~~~~~~~~~~~~~~~~~~~ + +These are used inside of an #ifdef, so replacing that with an +IS_ENABLED() check lets the compiler see how they are used while +still dropping them during dead code elimination. + +Fixes: dbf847ecb631 ("knfsd: allow cache_register to return error on failure") +Reviewed-by: Jeff Layton +Acked-by: Chuck Lever +Signed-off-by: Arnd Bergmann +Signed-off-by: Anna Schumaker +Signed-off-by: Sasha Levin +--- + net/sunrpc/cache.c | 10 +++------- + 1 file changed, 3 insertions(+), 7 deletions(-) + +diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c +index 059f6ef1ad189..7fcb0574fc79e 100644 +--- a/net/sunrpc/cache.c ++++ b/net/sunrpc/cache.c +@@ -1669,12 +1669,14 @@ static void remove_cache_proc_entries(struct cache_detail *cd) + } + } + +-#ifdef CONFIG_PROC_FS + static int create_cache_proc_entries(struct cache_detail *cd, struct net *net) + { + struct proc_dir_entry *p; + struct sunrpc_net *sn; + ++ if (!IS_ENABLED(CONFIG_PROC_FS)) ++ return 0; ++ + sn = net_generic(net, sunrpc_net_id); + cd->procfs = proc_mkdir(cd->name, sn->proc_net_rpc); + if (cd->procfs == NULL) +@@ -1702,12 +1704,6 @@ static int create_cache_proc_entries(struct cache_detail *cd, struct net *net) + remove_cache_proc_entries(cd); + return -ENOMEM; + } +-#else /* CONFIG_PROC_FS */ +-static int create_cache_proc_entries(struct cache_detail *cd, struct net *net) +-{ +- return 0; +-} +-#endif + + void __init cache_initialize(void) + { +-- +2.39.5 +