5.10-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 18 Apr 2022 10:19:03 +0000 (12:19 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 18 Apr 2022 10:19:03 +0000 (12:19 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 18 Apr 2022 10:19:03 +0000 (12:19 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 18 Apr 2022 10:19:03 +0000 (12:19 +0200)
diff --git a/queue-5.10/scsi-iscsi-fix-endpoint-reuse-regression.patch b/queue-5.10/scsi-iscsi-fix-endpoint-reuse-regression.patch

new file mode 100644 (file)

index 0000000..bf5d427
--- /dev/null
+++ b/queue-5.10/scsi-iscsi-fix-endpoint-reuse-regression.patch
@@ -0,0 +1,75 @@
+From 0aadafb5c34403a7cced1a8d61877048dc059f70 Mon Sep 17 00:00:00 2001
+From: Mike Christie <michael.christie@oracle.com>
+Date: Thu, 7 Apr 2022 19:13:08 -0500
+Subject: scsi: iscsi: Fix endpoint reuse regression
+
+From: Mike Christie <michael.christie@oracle.com>
+
+commit 0aadafb5c34403a7cced1a8d61877048dc059f70 upstream.
+
+This patch fixes a bug where when using iSCSI offload we can free an
+endpoint while userspace still thinks it's active. That then causes the
+endpoint ID to be reused for a new connection's endpoint while userspace
+still thinks the ID is for the original connection. Userspace will then end
+up disconnecting a running connection's endpoint or trying to bind to
+another connection's endpoint.
+
+This bug is a regression added in:
+
+Commit 23d6fefbb3f6 ("scsi: iscsi: Fix in-kernel conn failure handling")
+
+where we added a in kernel ep_disconnect call to fix a bug in:
+
+Commit 0ab710458da1 ("scsi: iscsi: Perform connection failure entirely in
+kernel space")
+
+where we would call stop_conn without having done ep_disconnect. This early
+ep_disconnect call will then free the endpoint and it's ID while userspace
+still thinks the ID is valid.
+
+Fix the early release of the ID by having the in kernel recovery code keep
+a reference to the endpoint until userspace has called into the kernel to
+finish cleaning up the endpoint/connection. It requires the previous commit
+"scsi: iscsi: Release endpoint ID when its freed" which moved the freeing
+of the ID until when the endpoint is released.
+
+Link: https://lore.kernel.org/r/20220408001314.5014-5-michael.christie@oracle.com
+Fixes: 23d6fefbb3f6 ("scsi: iscsi: Fix in-kernel conn failure handling")
+Tested-by: Manish Rangankar <mrangankar@marvell.com>
+Reviewed-by: Lee Duncan <lduncan@suse.com>
+Reviewed-by: Chris Leech <cleech@redhat.com>
+Signed-off-by: Mike Christie <michael.christie@oracle.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/scsi_transport_iscsi.c |   12 +++++++++++-
+ 1 file changed, 11 insertions(+), 1 deletion(-)
+
+--- a/drivers/scsi/scsi_transport_iscsi.c
++++ b/drivers/scsi/scsi_transport_iscsi.c
+@@ -2273,7 +2273,11 @@ static void iscsi_if_disconnect_bound_ep
+               mutex_unlock(&conn->ep_mutex);
+ 
+               flush_work(&conn->cleanup_work);
+-
++              /*
++               * Userspace is now done with the EP so we can release the ref
++               * iscsi_cleanup_conn_work_fn took.
++               */
++              iscsi_put_endpoint(ep);
+               mutex_lock(&conn->ep_mutex);
+       }
+ }
+@@ -2355,6 +2359,12 @@ static void iscsi_cleanup_conn_work_fn(s
+               return;
+       }
+ 
++      /*
++       * Get a ref to the ep, so we don't release its ID until after
++       * userspace is done referencing it in iscsi_if_disconnect_bound_ep.
++       */
++      if (conn->ep)
++              get_device(&conn->ep->dev);
+       iscsi_ep_disconnect(conn, false);
+ 
+       if (system_state != SYSTEM_RUNNING) {
diff --git a/queue-5.10/scsi-iscsi-fix-unbound-endpoint-error-handling.patch b/queue-5.10/scsi-iscsi-fix-unbound-endpoint-error-handling.patch

new file mode 100644 (file)

index 0000000..9d2de8b
--- /dev/null
+++ b/queue-5.10/scsi-iscsi-fix-unbound-endpoint-error-handling.patch
@@ -0,0 +1,187 @@
+From 03690d81974535f228e892a14f0d2d44404fe555 Mon Sep 17 00:00:00 2001
+From: Mike Christie <michael.christie@oracle.com>
+Date: Thu, 7 Apr 2022 19:13:10 -0500
+Subject: scsi: iscsi: Fix unbound endpoint error handling
+
+From: Mike Christie <michael.christie@oracle.com>
+
+commit 03690d81974535f228e892a14f0d2d44404fe555 upstream.
+
+If a driver raises a connection error before the connection is bound, we
+can leave a cleanup_work queued that can later run and disconnect/stop a
+connection that is logged in. The problem is that drivers can call
+iscsi_conn_error_event for endpoints that are connected but not yet bound
+when something like the network port they are using is brought down.
+iscsi_cleanup_conn_work_fn will check for this and exit early, but if the
+cleanup_work is stuck behind other works, it might not get run until after
+userspace has done ep_disconnect. Because the endpoint is not yet bound
+there was no way for ep_disconnect to flush the work.
+
+The bug of leaving stop_conns queued was added in:
+
+Commit 23d6fefbb3f6 ("scsi: iscsi: Fix in-kernel conn failure handling")
+
+and:
+
+Commit 0ab710458da1 ("scsi: iscsi: Perform connection failure entirely in
+kernel space")
+
+was supposed to fix it, but left this case.
+
+This patch moves the conn state check to before we even queue the work so
+we can avoid queueing.
+
+Link: https://lore.kernel.org/r/20220408001314.5014-7-michael.christie@oracle.com
+Fixes: 0ab710458da1 ("scsi: iscsi: Perform connection failure entirely in kernel space")
+Tested-by: Manish Rangankar <mrangankar@marvell.com>
+Reviewed-by: Lee Duncan <lduncan@@suse.com>
+Reviewed-by: Chris Leech <cleech@redhat.com>
+Signed-off-by: Mike Christie <michael.christie@oracle.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/scsi_transport_iscsi.c |   65 +++++++++++++++++++-----------------
+ 1 file changed, 36 insertions(+), 29 deletions(-)
+
+--- a/drivers/scsi/scsi_transport_iscsi.c
++++ b/drivers/scsi/scsi_transport_iscsi.c
+@@ -2224,10 +2224,10 @@ static void iscsi_stop_conn(struct iscsi
+ 
+       switch (flag) {
+       case STOP_CONN_RECOVER:
+-              conn->state = ISCSI_CONN_FAILED;
++              WRITE_ONCE(conn->state, ISCSI_CONN_FAILED);
+               break;
+       case STOP_CONN_TERM:
+-              conn->state = ISCSI_CONN_DOWN;
++              WRITE_ONCE(conn->state, ISCSI_CONN_DOWN);
+               break;
+       default:
+               iscsi_cls_conn_printk(KERN_ERR, conn, "invalid stop flag %d\n",
+@@ -2245,7 +2245,7 @@ static void iscsi_ep_disconnect(struct i
+       struct iscsi_endpoint *ep;
+ 
+       ISCSI_DBG_TRANS_CONN(conn, "disconnect ep.\n");
+-      conn->state = ISCSI_CONN_FAILED;
++      WRITE_ONCE(conn->state, ISCSI_CONN_FAILED);
+ 
+       if (!conn->ep || !session->transport->ep_disconnect)
+               return;
+@@ -2345,21 +2345,6 @@ static void iscsi_cleanup_conn_work_fn(s
+ 
+       mutex_lock(&conn->ep_mutex);
+       /*
+-       * If we are not at least bound there is nothing for us to do. Userspace
+-       * will do a ep_disconnect call if offload is used, but will not be
+-       * doing a stop since there is nothing to clean up, so we have to clear
+-       * the cleanup bit here.
+-       */
+-      if (conn->state != ISCSI_CONN_BOUND && conn->state != ISCSI_CONN_UP) {
+-              ISCSI_DBG_TRANS_CONN(conn, "Got error while conn is already failed. Ignoring.\n");
+-              spin_lock_irq(&conn->lock);
+-              clear_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags);
+-              spin_unlock_irq(&conn->lock);
+-              mutex_unlock(&conn->ep_mutex);
+-              return;
+-      }
+-
+-      /*
+        * Get a ref to the ep, so we don't release its ID until after
+        * userspace is done referencing it in iscsi_if_disconnect_bound_ep.
+        */
+@@ -2425,7 +2410,7 @@ iscsi_create_conn(struct iscsi_cls_sessi
+       INIT_WORK(&conn->cleanup_work, iscsi_cleanup_conn_work_fn);
+       conn->transport = transport;
+       conn->cid = cid;
+-      conn->state = ISCSI_CONN_DOWN;
++      WRITE_ONCE(conn->state, ISCSI_CONN_DOWN);
+ 
+       /* this is released in the dev's release function */
+       if (!get_device(&session->dev))
+@@ -2613,10 +2598,30 @@ void iscsi_conn_error_event(struct iscsi
+       struct iscsi_internal *priv;
+       int len = nlmsg_total_size(sizeof(*ev));
+       unsigned long flags;
++      int state;
+ 
+       spin_lock_irqsave(&conn->lock, flags);
+-      if (!test_and_set_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags))
+-              queue_work(iscsi_conn_cleanup_workq, &conn->cleanup_work);
++      /*
++       * Userspace will only do a stop call if we are at least bound. And, we
++       * only need to do the in kernel cleanup if in the UP state so cmds can
++       * be released to upper layers. If in other states just wait for
++       * userspace to avoid races that can leave the cleanup_work queued.
++       */
++      state = READ_ONCE(conn->state);
++      switch (state) {
++      case ISCSI_CONN_BOUND:
++      case ISCSI_CONN_UP:
++              if (!test_and_set_bit(ISCSI_CLS_CONN_BIT_CLEANUP,
++                                    &conn->flags)) {
++                      queue_work(iscsi_conn_cleanup_workq,
++                                 &conn->cleanup_work);
++              }
++              break;
++      default:
++              ISCSI_DBG_TRANS_CONN(conn, "Got conn error in state %d\n",
++                                   state);
++              break;
++      }
+       spin_unlock_irqrestore(&conn->lock, flags);
+ 
+       priv = iscsi_if_transport_lookup(conn->transport);
+@@ -2967,7 +2972,7 @@ iscsi_set_param(struct iscsi_transport *
+       char *data = (char*)ev + sizeof(*ev);
+       struct iscsi_cls_conn *conn;
+       struct iscsi_cls_session *session;
+-      int err = 0, value = 0;
++      int err = 0, value = 0, state;
+ 
+       if (ev->u.set_param.len > PAGE_SIZE)
+               return -EINVAL;
+@@ -2984,8 +2989,8 @@ iscsi_set_param(struct iscsi_transport *
+                       session->recovery_tmo = value;
+               break;
+       default:
+-              if ((conn->state == ISCSI_CONN_BOUND) ||
+-                      (conn->state == ISCSI_CONN_UP)) {
++              state = READ_ONCE(conn->state);
++              if (state == ISCSI_CONN_BOUND || state == ISCSI_CONN_UP) {
+                       err = transport->set_param(conn, ev->u.set_param.param,
+                                       data, ev->u.set_param.len);
+               } else {
+@@ -3781,7 +3786,7 @@ static int iscsi_if_transport_conn(struc
+                                               ev->u.b_conn.transport_eph,
+                                               ev->u.b_conn.is_leading);
+               if (!ev->r.retcode)
+-                      conn->state = ISCSI_CONN_BOUND;
++                      WRITE_ONCE(conn->state, ISCSI_CONN_BOUND);
+ 
+               if (ev->r.retcode || !transport->ep_connect)
+                       break;
+@@ -3800,7 +3805,8 @@ static int iscsi_if_transport_conn(struc
+       case ISCSI_UEVENT_START_CONN:
+               ev->r.retcode = transport->start_conn(conn);
+               if (!ev->r.retcode)
+-                      conn->state = ISCSI_CONN_UP;
++                      WRITE_ONCE(conn->state, ISCSI_CONN_UP);
++
+               break;
+       case ISCSI_UEVENT_SEND_PDU:
+               pdu_len = nlh->nlmsg_len - sizeof(*nlh) - sizeof(*ev);
+@@ -4108,10 +4114,11 @@ static ssize_t show_conn_state(struct de
+ {
+       struct iscsi_cls_conn *conn = iscsi_dev_to_conn(dev->parent);
+       const char *state = "unknown";
++      int conn_state = READ_ONCE(conn->state);
+ 
+-      if (conn->state >= 0 &&
+-          conn->state < ARRAY_SIZE(connection_state_names))
+-              state = connection_state_names[conn->state];
++      if (conn_state >= 0 &&
++          conn_state < ARRAY_SIZE(connection_state_names))
++              state = connection_state_names[conn_state];
+ 
+       return sysfs_emit(buf, "%s\n", state);
+ }
diff --git a/queue-5.10/series b/queue-5.10/series

index 9c93c5e9ecb7d940559662fe63c966d31a0fe7c4..8ab1895a0a0214fd7e40ecd26d01f7d0775b3082 100644 (file)
--- a/queue-5.10/series
+++ b/queue-5.10/series
@@ -93,3 +93,5 @@ smp-fix-offline-cpu-check-in-flush_smp_call_function_queue.patch
  i2c-pasemi-wait-for-write-xfers-to-finish.patch
  timers-fix-warning-condition-in-__run_timers.patch
  dma-direct-avoid-redundant-memory-sync-for-swiotlb.patch
+scsi-iscsi-fix-endpoint-reuse-regression.patch
+scsi-iscsi-fix-unbound-endpoint-error-handling.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 18 Apr 2022 10:19:03 +0000 (12:19 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 18 Apr 2022 10:19:03 +0000 (12:19 +0200)
queue-5.10/scsi-iscsi-fix-endpoint-reuse-regression.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/scsi-iscsi-fix-unbound-endpoint-error-handling.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/series		patch \| blob \| blame \| history