[thirdparty/kernel/stable-queue.git] / queue-6.8 / rdma-cm-add-timeout-to-cm_destroy_id-wait.patch

From 71adc4b0ca414fddd7a83c47b370a48e1a3e7897 Mon Sep 17 00:00:00 2001
From: Sasha Levin <sashal@kernel.org>
Date: Fri, 8 Mar 2024 22:33:23 -0800
Subject: RDMA/cm: add timeout to cm_destroy_id wait

From: Manjunath Patil <manjunath.b.patil@oracle.com>

[ Upstream commit 96d9cbe2f2ff7abde021bac75eafaceabe9a51fa ]

Add timeout to cm_destroy_id, so that userspace can trigger any data
collection that would help in analyzing the cause of delay in destroying
the cm_id.

New noinline function helps dtrace/ebpf programs to hook on to it.
Existing functionality isn't changed except triggering a probe-able new
function at every timeout interval.

We have seen cases where CM messages stuck with MAD layer (either due to
software bug or faulty HCA), leading to cm_id getting stuck in the
following call stack. This patch helps in resolving such issues faster.

kernel: ... INFO: task XXXX:56778 blocked for more than 120 seconds.
...
	Call Trace:
	__schedule+0x2bc/0x895
	schedule+0x36/0x7c
	schedule_timeout+0x1f6/0x31f
 	? __slab_free+0x19c/0x2ba
	wait_for_completion+0x12b/0x18a
	? wake_up_q+0x80/0x73
	cm_destroy_id+0x345/0x610 [ib_cm]
	ib_destroy_cm_id+0x10/0x20 [ib_cm]
	rdma_destroy_id+0xa8/0x300 [rdma_cm]
	ucma_destroy_id+0x13e/0x190 [rdma_ucm]
	ucma_write+0xe0/0x160 [rdma_ucm]
	__vfs_write+0x3a/0x16d
	vfs_write+0xb2/0x1a1
	? syscall_trace_enter+0x1ce/0x2b8
	SyS_write+0x5c/0xd3
	do_syscall_64+0x79/0x1b9
	entry_SYSCALL_64_after_hwframe+0x16d/0x0

Signed-off-by: Manjunath Patil <manjunath.b.patil@oracle.com>
Link: https://lore.kernel.org/r/20240309063323.458102-1-manjunath.b.patil@oracle.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/infiniband/core/cm.c | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index ff58058aeadca..bf0df6ee4f785 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -34,6 +34,7 @@ MODULE_AUTHOR("Sean Hefty");
 MODULE_DESCRIPTION("InfiniBand CM");
 MODULE_LICENSE("Dual BSD/GPL");
 
+#define CM_DESTROY_ID_WAIT_TIMEOUT 10000 /* msecs */
 static const char * const ibcm_rej_reason_strs[] = {
 	[IB_CM_REJ_NO_QP]			= "no QP",
 	[IB_CM_REJ_NO_EEC]			= "no EEC",
@@ -1025,10 +1026,20 @@ static void cm_reset_to_idle(struct cm_id_private *cm_id_priv)
 	}
 }
 
+static noinline void cm_destroy_id_wait_timeout(struct ib_cm_id *cm_id)
+{
+	struct cm_id_private *cm_id_priv;
+
+	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+	pr_err("%s: cm_id=%p timed out. state=%d refcnt=%d\n", __func__,
+	       cm_id, cm_id->state, refcount_read(&cm_id_priv->refcount));
+}
+
 static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
 {
 	struct cm_id_private *cm_id_priv;
 	struct cm_work *work;
+	int ret;
 
 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
 	spin_lock_irq(&cm_id_priv->lock);
@@ -1135,7 +1146,14 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
 
 	xa_erase(&cm.local_id_table, cm_local_id(cm_id->local_id));
 	cm_deref_id(cm_id_priv);
-	wait_for_completion(&cm_id_priv->comp);
+	do {
+		ret = wait_for_completion_timeout(&cm_id_priv->comp,
+						  msecs_to_jiffies(
+						  CM_DESTROY_ID_WAIT_TIMEOUT));
+		if (!ret) /* timeout happened */
+			cm_destroy_id_wait_timeout(cm_id);
+	} while (!ret);
+
 	while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
 		cm_free_work(work);
 
-- 
2.43.0
Commit	Line	Data
335f7cc0 SL	1	From 71adc4b0ca414fddd7a83c47b370a48e1a3e7897 Mon Sep 17 00:00:00 2001
	2	From: Sasha Levin <sashal@kernel.org>
	3	Date: Fri, 8 Mar 2024 22:33:23 -0800
	4	Subject: RDMA/cm: add timeout to cm_destroy_id wait
	5
	6	From: Manjunath Patil <manjunath.b.patil@oracle.com>
	7
	8	[ Upstream commit 96d9cbe2f2ff7abde021bac75eafaceabe9a51fa ]
	9
	10	Add timeout to cm_destroy_id, so that userspace can trigger any data
	11	collection that would help in analyzing the cause of delay in destroying
	12	the cm_id.
	13
	14	New noinline function helps dtrace/ebpf programs to hook on to it.
	15	Existing functionality isn't changed except triggering a probe-able new
	16	function at every timeout interval.
	17
	18	We have seen cases where CM messages stuck with MAD layer (either due to
	19	software bug or faulty HCA), leading to cm_id getting stuck in the
	20	following call stack. This patch helps in resolving such issues faster.
	21
	22	kernel: ... INFO: task XXXX:56778 blocked for more than 120 seconds.
	23	...
	24	Call Trace:
	25	__schedule+0x2bc/0x895
	26	schedule+0x36/0x7c
	27	schedule_timeout+0x1f6/0x31f
	28	? __slab_free+0x19c/0x2ba
	29	wait_for_completion+0x12b/0x18a
	30	? wake_up_q+0x80/0x73
	31	cm_destroy_id+0x345/0x610 [ib_cm]
	32	ib_destroy_cm_id+0x10/0x20 [ib_cm]
	33	rdma_destroy_id+0xa8/0x300 [rdma_cm]
	34	ucma_destroy_id+0x13e/0x190 [rdma_ucm]
	35	ucma_write+0xe0/0x160 [rdma_ucm]
	36	__vfs_write+0x3a/0x16d
	37	vfs_write+0xb2/0x1a1
	38	? syscall_trace_enter+0x1ce/0x2b8
	39	SyS_write+0x5c/0xd3
	40	do_syscall_64+0x79/0x1b9
	41	entry_SYSCALL_64_after_hwframe+0x16d/0x0
	42
	43	Signed-off-by: Manjunath Patil <manjunath.b.patil@oracle.com>
	44	Link: https://lore.kernel.org/r/20240309063323.458102-1-manjunath.b.patil@oracle.com
	45	Signed-off-by: Leon Romanovsky <leon@kernel.org>
	46	Signed-off-by: Sasha Levin <sashal@kernel.org>
	47	---
	48	drivers/infiniband/core/cm.c \| 20 +++++++++++++++++++-
	49	1 file changed, 19 insertions(+), 1 deletion(-)
	50
	51	diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
	52	index ff58058aeadca..bf0df6ee4f785 100644
	53	--- a/drivers/infiniband/core/cm.c
	54	+++ b/drivers/infiniband/core/cm.c
	55	@@ -34,6 +34,7 @@ MODULE_AUTHOR("Sean Hefty");
	56	MODULE_DESCRIPTION("InfiniBand CM");
	57	MODULE_LICENSE("Dual BSD/GPL");
	58
	59	+#define CM_DESTROY_ID_WAIT_TIMEOUT 10000 /* msecs */
	60	static const char * const ibcm_rej_reason_strs[] = {
	61	[IB_CM_REJ_NO_QP] = "no QP",
	62	[IB_CM_REJ_NO_EEC] = "no EEC",
	63	@@ -1025,10 +1026,20 @@ static void cm_reset_to_idle(struct cm_id_private *cm_id_priv)
	64	}
65	}
66
67	+static noinline void cm_destroy_id_wait_timeout(struct ib_cm_id *cm_id)
68	+{
69	+ struct cm_id_private *cm_id_priv;
70	+
71	+ cm_id_priv = container_of(cm_id, struct cm_id_private, id);
72	+ pr_err("%s: cm_id=%p timed out. state=%d refcnt=%d\n", __func__,
73	+ cm_id, cm_id->state, refcount_read(&cm_id_priv->refcount));
74	+}
75	+
76	static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
77	{
78	struct cm_id_private *cm_id_priv;
79	struct cm_work *work;
80	+ int ret;
81
82	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
83	spin_lock_irq(&cm_id_priv->lock);
84	@@ -1135,7 +1146,14 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
85
86	xa_erase(&cm.local_id_table, cm_local_id(cm_id->local_id));
87	cm_deref_id(cm_id_priv);
88	- wait_for_completion(&cm_id_priv->comp);
89	+ do {
90	+ ret = wait_for_completion_timeout(&cm_id_priv->comp,
91	+ msecs_to_jiffies(
92	+ CM_DESTROY_ID_WAIT_TIMEOUT));
93	+ if (!ret) /* timeout happened */
94	+ cm_destroy_id_wait_timeout(cm_id);
95	+ } while (!ret);
96	+
97	while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
98	cm_free_work(work);
99
100	--
101	2.43.0
102