--- /dev/null
+From 0aadafb5c34403a7cced1a8d61877048dc059f70 Mon Sep 17 00:00:00 2001
+From: Mike Christie <michael.christie@oracle.com>
+Date: Thu, 7 Apr 2022 19:13:08 -0500
+Subject: scsi: iscsi: Fix endpoint reuse regression
+
+From: Mike Christie <michael.christie@oracle.com>
+
+commit 0aadafb5c34403a7cced1a8d61877048dc059f70 upstream.
+
+This patch fixes a bug where when using iSCSI offload we can free an
+endpoint while userspace still thinks it's active. That then causes the
+endpoint ID to be reused for a new connection's endpoint while userspace
+still thinks the ID is for the original connection. Userspace will then end
+up disconnecting a running connection's endpoint or trying to bind to
+another connection's endpoint.
+
+This bug is a regression added in:
+
+Commit 23d6fefbb3f6 ("scsi: iscsi: Fix in-kernel conn failure handling")
+
+where we added a in kernel ep_disconnect call to fix a bug in:
+
+Commit 0ab710458da1 ("scsi: iscsi: Perform connection failure entirely in
+kernel space")
+
+where we would call stop_conn without having done ep_disconnect. This early
+ep_disconnect call will then free the endpoint and it's ID while userspace
+still thinks the ID is valid.
+
+Fix the early release of the ID by having the in kernel recovery code keep
+a reference to the endpoint until userspace has called into the kernel to
+finish cleaning up the endpoint/connection. It requires the previous commit
+"scsi: iscsi: Release endpoint ID when its freed" which moved the freeing
+of the ID until when the endpoint is released.
+
+Link: https://lore.kernel.org/r/20220408001314.5014-5-michael.christie@oracle.com
+Fixes: 23d6fefbb3f6 ("scsi: iscsi: Fix in-kernel conn failure handling")
+Tested-by: Manish Rangankar <mrangankar@marvell.com>
+Reviewed-by: Lee Duncan <lduncan@suse.com>
+Reviewed-by: Chris Leech <cleech@redhat.com>
+Signed-off-by: Mike Christie <michael.christie@oracle.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/scsi_transport_iscsi.c | 12 +++++++++++-
+ 1 file changed, 11 insertions(+), 1 deletion(-)
+
+--- a/drivers/scsi/scsi_transport_iscsi.c
++++ b/drivers/scsi/scsi_transport_iscsi.c
+@@ -2273,7 +2273,11 @@ static void iscsi_if_disconnect_bound_ep
+ mutex_unlock(&conn->ep_mutex);
+
+ flush_work(&conn->cleanup_work);
+-
++ /*
++ * Userspace is now done with the EP so we can release the ref
++ * iscsi_cleanup_conn_work_fn took.
++ */
++ iscsi_put_endpoint(ep);
+ mutex_lock(&conn->ep_mutex);
+ }
+ }
+@@ -2355,6 +2359,12 @@ static void iscsi_cleanup_conn_work_fn(s
+ return;
+ }
+
++ /*
++ * Get a ref to the ep, so we don't release its ID until after
++ * userspace is done referencing it in iscsi_if_disconnect_bound_ep.
++ */
++ if (conn->ep)
++ get_device(&conn->ep->dev);
+ iscsi_ep_disconnect(conn, false);
+
+ if (system_state != SYSTEM_RUNNING) {
--- /dev/null
+From 03690d81974535f228e892a14f0d2d44404fe555 Mon Sep 17 00:00:00 2001
+From: Mike Christie <michael.christie@oracle.com>
+Date: Thu, 7 Apr 2022 19:13:10 -0500
+Subject: scsi: iscsi: Fix unbound endpoint error handling
+
+From: Mike Christie <michael.christie@oracle.com>
+
+commit 03690d81974535f228e892a14f0d2d44404fe555 upstream.
+
+If a driver raises a connection error before the connection is bound, we
+can leave a cleanup_work queued that can later run and disconnect/stop a
+connection that is logged in. The problem is that drivers can call
+iscsi_conn_error_event for endpoints that are connected but not yet bound
+when something like the network port they are using is brought down.
+iscsi_cleanup_conn_work_fn will check for this and exit early, but if the
+cleanup_work is stuck behind other works, it might not get run until after
+userspace has done ep_disconnect. Because the endpoint is not yet bound
+there was no way for ep_disconnect to flush the work.
+
+The bug of leaving stop_conns queued was added in:
+
+Commit 23d6fefbb3f6 ("scsi: iscsi: Fix in-kernel conn failure handling")
+
+and:
+
+Commit 0ab710458da1 ("scsi: iscsi: Perform connection failure entirely in
+kernel space")
+
+was supposed to fix it, but left this case.
+
+This patch moves the conn state check to before we even queue the work so
+we can avoid queueing.
+
+Link: https://lore.kernel.org/r/20220408001314.5014-7-michael.christie@oracle.com
+Fixes: 0ab710458da1 ("scsi: iscsi: Perform connection failure entirely in kernel space")
+Tested-by: Manish Rangankar <mrangankar@marvell.com>
+Reviewed-by: Lee Duncan <lduncan@@suse.com>
+Reviewed-by: Chris Leech <cleech@redhat.com>
+Signed-off-by: Mike Christie <michael.christie@oracle.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/scsi_transport_iscsi.c | 65 +++++++++++++++++++-----------------
+ 1 file changed, 36 insertions(+), 29 deletions(-)
+
+--- a/drivers/scsi/scsi_transport_iscsi.c
++++ b/drivers/scsi/scsi_transport_iscsi.c
+@@ -2224,10 +2224,10 @@ static void iscsi_stop_conn(struct iscsi
+
+ switch (flag) {
+ case STOP_CONN_RECOVER:
+- conn->state = ISCSI_CONN_FAILED;
++ WRITE_ONCE(conn->state, ISCSI_CONN_FAILED);
+ break;
+ case STOP_CONN_TERM:
+- conn->state = ISCSI_CONN_DOWN;
++ WRITE_ONCE(conn->state, ISCSI_CONN_DOWN);
+ break;
+ default:
+ iscsi_cls_conn_printk(KERN_ERR, conn, "invalid stop flag %d\n",
+@@ -2245,7 +2245,7 @@ static void iscsi_ep_disconnect(struct i
+ struct iscsi_endpoint *ep;
+
+ ISCSI_DBG_TRANS_CONN(conn, "disconnect ep.\n");
+- conn->state = ISCSI_CONN_FAILED;
++ WRITE_ONCE(conn->state, ISCSI_CONN_FAILED);
+
+ if (!conn->ep || !session->transport->ep_disconnect)
+ return;
+@@ -2345,21 +2345,6 @@ static void iscsi_cleanup_conn_work_fn(s
+
+ mutex_lock(&conn->ep_mutex);
+ /*
+- * If we are not at least bound there is nothing for us to do. Userspace
+- * will do a ep_disconnect call if offload is used, but will not be
+- * doing a stop since there is nothing to clean up, so we have to clear
+- * the cleanup bit here.
+- */
+- if (conn->state != ISCSI_CONN_BOUND && conn->state != ISCSI_CONN_UP) {
+- ISCSI_DBG_TRANS_CONN(conn, "Got error while conn is already failed. Ignoring.\n");
+- spin_lock_irq(&conn->lock);
+- clear_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags);
+- spin_unlock_irq(&conn->lock);
+- mutex_unlock(&conn->ep_mutex);
+- return;
+- }
+-
+- /*
+ * Get a ref to the ep, so we don't release its ID until after
+ * userspace is done referencing it in iscsi_if_disconnect_bound_ep.
+ */
+@@ -2425,7 +2410,7 @@ iscsi_create_conn(struct iscsi_cls_sessi
+ INIT_WORK(&conn->cleanup_work, iscsi_cleanup_conn_work_fn);
+ conn->transport = transport;
+ conn->cid = cid;
+- conn->state = ISCSI_CONN_DOWN;
++ WRITE_ONCE(conn->state, ISCSI_CONN_DOWN);
+
+ /* this is released in the dev's release function */
+ if (!get_device(&session->dev))
+@@ -2613,10 +2598,30 @@ void iscsi_conn_error_event(struct iscsi
+ struct iscsi_internal *priv;
+ int len = nlmsg_total_size(sizeof(*ev));
+ unsigned long flags;
++ int state;
+
+ spin_lock_irqsave(&conn->lock, flags);
+- if (!test_and_set_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags))
+- queue_work(iscsi_conn_cleanup_workq, &conn->cleanup_work);
++ /*
++ * Userspace will only do a stop call if we are at least bound. And, we
++ * only need to do the in kernel cleanup if in the UP state so cmds can
++ * be released to upper layers. If in other states just wait for
++ * userspace to avoid races that can leave the cleanup_work queued.
++ */
++ state = READ_ONCE(conn->state);
++ switch (state) {
++ case ISCSI_CONN_BOUND:
++ case ISCSI_CONN_UP:
++ if (!test_and_set_bit(ISCSI_CLS_CONN_BIT_CLEANUP,
++ &conn->flags)) {
++ queue_work(iscsi_conn_cleanup_workq,
++ &conn->cleanup_work);
++ }
++ break;
++ default:
++ ISCSI_DBG_TRANS_CONN(conn, "Got conn error in state %d\n",
++ state);
++ break;
++ }
+ spin_unlock_irqrestore(&conn->lock, flags);
+
+ priv = iscsi_if_transport_lookup(conn->transport);
+@@ -2967,7 +2972,7 @@ iscsi_set_param(struct iscsi_transport *
+ char *data = (char*)ev + sizeof(*ev);
+ struct iscsi_cls_conn *conn;
+ struct iscsi_cls_session *session;
+- int err = 0, value = 0;
++ int err = 0, value = 0, state;
+
+ if (ev->u.set_param.len > PAGE_SIZE)
+ return -EINVAL;
+@@ -2984,8 +2989,8 @@ iscsi_set_param(struct iscsi_transport *
+ session->recovery_tmo = value;
+ break;
+ default:
+- if ((conn->state == ISCSI_CONN_BOUND) ||
+- (conn->state == ISCSI_CONN_UP)) {
++ state = READ_ONCE(conn->state);
++ if (state == ISCSI_CONN_BOUND || state == ISCSI_CONN_UP) {
+ err = transport->set_param(conn, ev->u.set_param.param,
+ data, ev->u.set_param.len);
+ } else {
+@@ -3781,7 +3786,7 @@ static int iscsi_if_transport_conn(struc
+ ev->u.b_conn.transport_eph,
+ ev->u.b_conn.is_leading);
+ if (!ev->r.retcode)
+- conn->state = ISCSI_CONN_BOUND;
++ WRITE_ONCE(conn->state, ISCSI_CONN_BOUND);
+
+ if (ev->r.retcode || !transport->ep_connect)
+ break;
+@@ -3800,7 +3805,8 @@ static int iscsi_if_transport_conn(struc
+ case ISCSI_UEVENT_START_CONN:
+ ev->r.retcode = transport->start_conn(conn);
+ if (!ev->r.retcode)
+- conn->state = ISCSI_CONN_UP;
++ WRITE_ONCE(conn->state, ISCSI_CONN_UP);
++
+ break;
+ case ISCSI_UEVENT_SEND_PDU:
+ pdu_len = nlh->nlmsg_len - sizeof(*nlh) - sizeof(*ev);
+@@ -4108,10 +4114,11 @@ static ssize_t show_conn_state(struct de
+ {
+ struct iscsi_cls_conn *conn = iscsi_dev_to_conn(dev->parent);
+ const char *state = "unknown";
++ int conn_state = READ_ONCE(conn->state);
+
+- if (conn->state >= 0 &&
+- conn->state < ARRAY_SIZE(connection_state_names))
+- state = connection_state_names[conn->state];
++ if (conn_state >= 0 &&
++ conn_state < ARRAY_SIZE(connection_state_names))
++ state = connection_state_names[conn_state];
+
+ return sysfs_emit(buf, "%s\n", state);
+ }