From cd7093e502b5866eda2d2d2c7f97df17a7acfac5 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 18 Apr 2022 12:19:03 +0200
Subject: [PATCH] 5.10-stable patches

added patches:
	scsi-iscsi-fix-endpoint-reuse-regression.patch
	scsi-iscsi-fix-unbound-endpoint-error-handling.patch
---
 ...-iscsi-fix-endpoint-reuse-regression.patch |  75 +++++++
 ...-fix-unbound-endpoint-error-handling.patch | 187 ++++++++++++++++++
 queue-5.10/series                             |   2 +
 3 files changed, 264 insertions(+)
 create mode 100644 queue-5.10/scsi-iscsi-fix-endpoint-reuse-regression.patch
 create mode 100644 queue-5.10/scsi-iscsi-fix-unbound-endpoint-error-handling.patch

diff --git a/queue-5.10/scsi-iscsi-fix-endpoint-reuse-regression.patch b/queue-5.10/scsi-iscsi-fix-endpoint-reuse-regression.patch
new file mode 100644
index 00000000000..bf5d4273b8d
--- /dev/null
+++ b/queue-5.10/scsi-iscsi-fix-endpoint-reuse-regression.patch
@@ -0,0 +1,75 @@
+From 0aadafb5c34403a7cced1a8d61877048dc059f70 Mon Sep 17 00:00:00 2001
+From: Mike Christie <michael.christie@oracle.com>
+Date: Thu, 7 Apr 2022 19:13:08 -0500
+Subject: scsi: iscsi: Fix endpoint reuse regression
+
+From: Mike Christie <michael.christie@oracle.com>
+
+commit 0aadafb5c34403a7cced1a8d61877048dc059f70 upstream.
+
+This patch fixes a bug where when using iSCSI offload we can free an
+endpoint while userspace still thinks it's active. That then causes the
+endpoint ID to be reused for a new connection's endpoint while userspace
+still thinks the ID is for the original connection. Userspace will then end
+up disconnecting a running connection's endpoint or trying to bind to
+another connection's endpoint.
+
+This bug is a regression added in:
+
+Commit 23d6fefbb3f6 ("scsi: iscsi: Fix in-kernel conn failure handling")
+
+where we added a in kernel ep_disconnect call to fix a bug in:
+
+Commit 0ab710458da1 ("scsi: iscsi: Perform connection failure entirely in
+kernel space")
+
+where we would call stop_conn without having done ep_disconnect. This early
+ep_disconnect call will then free the endpoint and it's ID while userspace
+still thinks the ID is valid.
+
+Fix the early release of the ID by having the in kernel recovery code keep
+a reference to the endpoint until userspace has called into the kernel to
+finish cleaning up the endpoint/connection. It requires the previous commit
+"scsi: iscsi: Release endpoint ID when its freed" which moved the freeing
+of the ID until when the endpoint is released.
+
+Link: https://lore.kernel.org/r/20220408001314.5014-5-michael.christie@oracle.com
+Fixes: 23d6fefbb3f6 ("scsi: iscsi: Fix in-kernel conn failure handling")
+Tested-by: Manish Rangankar <mrangankar@marvell.com>
+Reviewed-by: Lee Duncan <lduncan@suse.com>
+Reviewed-by: Chris Leech <cleech@redhat.com>
+Signed-off-by: Mike Christie <michael.christie@oracle.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/scsi_transport_iscsi.c |   12 +++++++++++-
+ 1 file changed, 11 insertions(+), 1 deletion(-)
+
+--- a/drivers/scsi/scsi_transport_iscsi.c
++++ b/drivers/scsi/scsi_transport_iscsi.c
+@@ -2273,7 +2273,11 @@ static void iscsi_if_disconnect_bound_ep
+ 		mutex_unlock(&conn->ep_mutex);
+ 
+ 		flush_work(&conn->cleanup_work);
+-
++		/*
++		 * Userspace is now done with the EP so we can release the ref
++		 * iscsi_cleanup_conn_work_fn took.
++		 */
++		iscsi_put_endpoint(ep);
+ 		mutex_lock(&conn->ep_mutex);
+ 	}
+ }
+@@ -2355,6 +2359,12 @@ static void iscsi_cleanup_conn_work_fn(s
+ 		return;
+ 	}
+ 
++	/*
++	 * Get a ref to the ep, so we don't release its ID until after
++	 * userspace is done referencing it in iscsi_if_disconnect_bound_ep.
++	 */
++	if (conn->ep)
++		get_device(&conn->ep->dev);
+ 	iscsi_ep_disconnect(conn, false);
+ 
+ 	if (system_state != SYSTEM_RUNNING) {
diff --git a/queue-5.10/scsi-iscsi-fix-unbound-endpoint-error-handling.patch b/queue-5.10/scsi-iscsi-fix-unbound-endpoint-error-handling.patch
new file mode 100644
index 00000000000..9d2de8b9410
--- /dev/null
+++ b/queue-5.10/scsi-iscsi-fix-unbound-endpoint-error-handling.patch
@@ -0,0 +1,187 @@
+From 03690d81974535f228e892a14f0d2d44404fe555 Mon Sep 17 00:00:00 2001
+From: Mike Christie <michael.christie@oracle.com>
+Date: Thu, 7 Apr 2022 19:13:10 -0500
+Subject: scsi: iscsi: Fix unbound endpoint error handling
+
+From: Mike Christie <michael.christie@oracle.com>
+
+commit 03690d81974535f228e892a14f0d2d44404fe555 upstream.
+
+If a driver raises a connection error before the connection is bound, we
+can leave a cleanup_work queued that can later run and disconnect/stop a
+connection that is logged in. The problem is that drivers can call
+iscsi_conn_error_event for endpoints that are connected but not yet bound
+when something like the network port they are using is brought down.
+iscsi_cleanup_conn_work_fn will check for this and exit early, but if the
+cleanup_work is stuck behind other works, it might not get run until after
+userspace has done ep_disconnect. Because the endpoint is not yet bound
+there was no way for ep_disconnect to flush the work.
+
+The bug of leaving stop_conns queued was added in:
+
+Commit 23d6fefbb3f6 ("scsi: iscsi: Fix in-kernel conn failure handling")
+
+and:
+
+Commit 0ab710458da1 ("scsi: iscsi: Perform connection failure entirely in
+kernel space")
+
+was supposed to fix it, but left this case.
+
+This patch moves the conn state check to before we even queue the work so
+we can avoid queueing.
+
+Link: https://lore.kernel.org/r/20220408001314.5014-7-michael.christie@oracle.com
+Fixes: 0ab710458da1 ("scsi: iscsi: Perform connection failure entirely in kernel space")
+Tested-by: Manish Rangankar <mrangankar@marvell.com>
+Reviewed-by: Lee Duncan <lduncan@@suse.com>
+Reviewed-by: Chris Leech <cleech@redhat.com>
+Signed-off-by: Mike Christie <michael.christie@oracle.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/scsi_transport_iscsi.c |   65 +++++++++++++++++++-----------------
+ 1 file changed, 36 insertions(+), 29 deletions(-)
+
+--- a/drivers/scsi/scsi_transport_iscsi.c
++++ b/drivers/scsi/scsi_transport_iscsi.c
+@@ -2224,10 +2224,10 @@ static void iscsi_stop_conn(struct iscsi
+ 
+ 	switch (flag) {
+ 	case STOP_CONN_RECOVER:
+-		conn->state = ISCSI_CONN_FAILED;
++		WRITE_ONCE(conn->state, ISCSI_CONN_FAILED);
+ 		break;
+ 	case STOP_CONN_TERM:
+-		conn->state = ISCSI_CONN_DOWN;
++		WRITE_ONCE(conn->state, ISCSI_CONN_DOWN);
+ 		break;
+ 	default:
+ 		iscsi_cls_conn_printk(KERN_ERR, conn, "invalid stop flag %d\n",
+@@ -2245,7 +2245,7 @@ static void iscsi_ep_disconnect(struct i
+ 	struct iscsi_endpoint *ep;
+ 
+ 	ISCSI_DBG_TRANS_CONN(conn, "disconnect ep.\n");
+-	conn->state = ISCSI_CONN_FAILED;
++	WRITE_ONCE(conn->state, ISCSI_CONN_FAILED);
+ 
+ 	if (!conn->ep || !session->transport->ep_disconnect)
+ 		return;
+@@ -2345,21 +2345,6 @@ static void iscsi_cleanup_conn_work_fn(s
+ 
+ 	mutex_lock(&conn->ep_mutex);
+ 	/*
+-	 * If we are not at least bound there is nothing for us to do. Userspace
+-	 * will do a ep_disconnect call if offload is used, but will not be
+-	 * doing a stop since there is nothing to clean up, so we have to clear
+-	 * the cleanup bit here.
+-	 */
+-	if (conn->state != ISCSI_CONN_BOUND && conn->state != ISCSI_CONN_UP) {
+-		ISCSI_DBG_TRANS_CONN(conn, "Got error while conn is already failed. Ignoring.\n");
+-		spin_lock_irq(&conn->lock);
+-		clear_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags);
+-		spin_unlock_irq(&conn->lock);
+-		mutex_unlock(&conn->ep_mutex);
+-		return;
+-	}
+-
+-	/*
+ 	 * Get a ref to the ep, so we don't release its ID until after
+ 	 * userspace is done referencing it in iscsi_if_disconnect_bound_ep.
+ 	 */
+@@ -2425,7 +2410,7 @@ iscsi_create_conn(struct iscsi_cls_sessi
+ 	INIT_WORK(&conn->cleanup_work, iscsi_cleanup_conn_work_fn);
+ 	conn->transport = transport;
+ 	conn->cid = cid;
+-	conn->state = ISCSI_CONN_DOWN;
++	WRITE_ONCE(conn->state, ISCSI_CONN_DOWN);
+ 
+ 	/* this is released in the dev's release function */
+ 	if (!get_device(&session->dev))
+@@ -2613,10 +2598,30 @@ void iscsi_conn_error_event(struct iscsi
+ 	struct iscsi_internal *priv;
+ 	int len = nlmsg_total_size(sizeof(*ev));
+ 	unsigned long flags;
++	int state;
+ 
+ 	spin_lock_irqsave(&conn->lock, flags);
+-	if (!test_and_set_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags))
+-		queue_work(iscsi_conn_cleanup_workq, &conn->cleanup_work);
++	/*
++	 * Userspace will only do a stop call if we are at least bound. And, we
++	 * only need to do the in kernel cleanup if in the UP state so cmds can
++	 * be released to upper layers. If in other states just wait for
++	 * userspace to avoid races that can leave the cleanup_work queued.
++	 */
++	state = READ_ONCE(conn->state);
++	switch (state) {
++	case ISCSI_CONN_BOUND:
++	case ISCSI_CONN_UP:
++		if (!test_and_set_bit(ISCSI_CLS_CONN_BIT_CLEANUP,
++				      &conn->flags)) {
++			queue_work(iscsi_conn_cleanup_workq,
++				   &conn->cleanup_work);
++		}
++		break;
++	default:
++		ISCSI_DBG_TRANS_CONN(conn, "Got conn error in state %d\n",
++				     state);
++		break;
++	}
+ 	spin_unlock_irqrestore(&conn->lock, flags);
+ 
+ 	priv = iscsi_if_transport_lookup(conn->transport);
+@@ -2967,7 +2972,7 @@ iscsi_set_param(struct iscsi_transport *
+ 	char *data = (char*)ev + sizeof(*ev);
+ 	struct iscsi_cls_conn *conn;
+ 	struct iscsi_cls_session *session;
+-	int err = 0, value = 0;
++	int err = 0, value = 0, state;
+ 
+ 	if (ev->u.set_param.len > PAGE_SIZE)
+ 		return -EINVAL;
+@@ -2984,8 +2989,8 @@ iscsi_set_param(struct iscsi_transport *
+ 			session->recovery_tmo = value;
+ 		break;
+ 	default:
+-		if ((conn->state == ISCSI_CONN_BOUND) ||
+-			(conn->state == ISCSI_CONN_UP)) {
++		state = READ_ONCE(conn->state);
++		if (state == ISCSI_CONN_BOUND || state == ISCSI_CONN_UP) {
+ 			err = transport->set_param(conn, ev->u.set_param.param,
+ 					data, ev->u.set_param.len);
+ 		} else {
+@@ -3781,7 +3786,7 @@ static int iscsi_if_transport_conn(struc
+ 						ev->u.b_conn.transport_eph,
+ 						ev->u.b_conn.is_leading);
+ 		if (!ev->r.retcode)
+-			conn->state = ISCSI_CONN_BOUND;
++			WRITE_ONCE(conn->state, ISCSI_CONN_BOUND);
+ 
+ 		if (ev->r.retcode || !transport->ep_connect)
+ 			break;
+@@ -3800,7 +3805,8 @@ static int iscsi_if_transport_conn(struc
+ 	case ISCSI_UEVENT_START_CONN:
+ 		ev->r.retcode = transport->start_conn(conn);
+ 		if (!ev->r.retcode)
+-			conn->state = ISCSI_CONN_UP;
++			WRITE_ONCE(conn->state, ISCSI_CONN_UP);
++
+ 		break;
+ 	case ISCSI_UEVENT_SEND_PDU:
+ 		pdu_len = nlh->nlmsg_len - sizeof(*nlh) - sizeof(*ev);
+@@ -4108,10 +4114,11 @@ static ssize_t show_conn_state(struct de
+ {
+ 	struct iscsi_cls_conn *conn = iscsi_dev_to_conn(dev->parent);
+ 	const char *state = "unknown";
++	int conn_state = READ_ONCE(conn->state);
+ 
+-	if (conn->state >= 0 &&
+-	    conn->state < ARRAY_SIZE(connection_state_names))
+-		state = connection_state_names[conn->state];
++	if (conn_state >= 0 &&
++	    conn_state < ARRAY_SIZE(connection_state_names))
++		state = connection_state_names[conn_state];
+ 
+ 	return sysfs_emit(buf, "%s\n", state);
+ }
diff --git a/queue-5.10/series b/queue-5.10/series
index 9c93c5e9ecb..8ab1895a0a0 100644
--- a/queue-5.10/series
+++ b/queue-5.10/series
@@ -93,3 +93,5 @@ smp-fix-offline-cpu-check-in-flush_smp_call_function_queue.patch
 i2c-pasemi-wait-for-write-xfers-to-finish.patch
 timers-fix-warning-condition-in-__run_timers.patch
 dma-direct-avoid-redundant-memory-sync-for-swiotlb.patch
+scsi-iscsi-fix-endpoint-reuse-regression.patch
+scsi-iscsi-fix-unbound-endpoint-error-handling.patch
-- 
2.47.3