]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.14-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 7 May 2025 13:57:49 +0000 (15:57 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 7 May 2025 13:57:49 +0000 (15:57 +0200)
added patches:
ublk-add-helper-of-ublk_need_map_io.patch
ublk-fix-race-between-io_uring_cmd_complete_in_task-and-ublk_cancel_cmd.patch
ublk-improve-detection-and-handling-of-ublk-server-exit.patch
ublk-move-device-reset-into-ublk_ch_release.patch
ublk-properly-serialize-all-fetch_reqs.patch
ublk-remove-__ublk_quiesce_dev.patch
ublk-simplify-aborting-ublk-request.patch

queue-6.14/series
queue-6.14/ublk-add-helper-of-ublk_need_map_io.patch [new file with mode: 0644]
queue-6.14/ublk-fix-race-between-io_uring_cmd_complete_in_task-and-ublk_cancel_cmd.patch [new file with mode: 0644]
queue-6.14/ublk-improve-detection-and-handling-of-ublk-server-exit.patch [new file with mode: 0644]
queue-6.14/ublk-move-device-reset-into-ublk_ch_release.patch [new file with mode: 0644]
queue-6.14/ublk-properly-serialize-all-fetch_reqs.patch [new file with mode: 0644]
queue-6.14/ublk-remove-__ublk_quiesce_dev.patch [new file with mode: 0644]
queue-6.14/ublk-simplify-aborting-ublk-request.patch [new file with mode: 0644]

index d041989b06bdc7a8a6f8e3377732e4f69842d87c..a954126f3573088c7eb01b23f95ff2896885afae 100644 (file)
@@ -151,3 +151,10 @@ accel-ivpu-fix-locking-order-in-ivpu_job_submit.patch
 accel-ivpu-add-handling-of-vpu_jsm_status_mvnci_context_violation_hw.patch
 drm-xe-invalidate-l3-read-only-cachelines-for-geometry-streams-too.patch
 platform-x86-alienware-wmi-wmax-add-support-for-alienware-m15-r7.patch
+ublk-add-helper-of-ublk_need_map_io.patch
+ublk-properly-serialize-all-fetch_reqs.patch
+ublk-move-device-reset-into-ublk_ch_release.patch
+ublk-improve-detection-and-handling-of-ublk-server-exit.patch
+ublk-remove-__ublk_quiesce_dev.patch
+ublk-simplify-aborting-ublk-request.patch
+ublk-fix-race-between-io_uring_cmd_complete_in_task-and-ublk_cancel_cmd.patch
diff --git a/queue-6.14/ublk-add-helper-of-ublk_need_map_io.patch b/queue-6.14/ublk-add-helper-of-ublk_need_map_io.patch
new file mode 100644 (file)
index 0000000..d913d90
--- /dev/null
@@ -0,0 +1,73 @@
+From stable+bounces-142018-greg=kroah.com@vger.kernel.org Wed May  7 11:47:26 2025
+From: Jared Holzman <jholzman@nvidia.com>
+Date: Wed,  7 May 2025 12:46:56 +0300
+Subject: ublk: add helper of ublk_need_map_io()
+To: stable@vger.kernel.org
+Cc: ming.lei@redhat.com, axboe@kernel.dk, ushankar@purestorage.com, gregkh@linuxfoundation.org, jholzman@nvidia.com
+Message-ID: <20250507094702.73459-2-jholzman@nvidia.com>
+
+From: Ming Lei <ming.lei@redhat.com>
+
+[ Upstream commit 1d781c0de08c0b35948ad4aaf609a4cc9995d9f6 ]
+
+ublk_need_map_io() is more readable.
+
+Reviewed-by: Caleb Sander Mateos <csander@purestorage.com>
+Signed-off-by: Ming Lei <ming.lei@redhat.com>
+Link: https://lore.kernel.org/r/20250327095123.179113-5-ming.lei@redhat.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/block/ublk_drv.c |   13 +++++++++----
+ 1 file changed, 9 insertions(+), 4 deletions(-)
+
+--- a/drivers/block/ublk_drv.c
++++ b/drivers/block/ublk_drv.c
+@@ -594,6 +594,11 @@ static inline bool ublk_support_user_cop
+       return ubq->flags & UBLK_F_USER_COPY;
+ }
++static inline bool ublk_need_map_io(const struct ublk_queue *ubq)
++{
++      return !ublk_support_user_copy(ubq);
++}
++
+ static inline bool ublk_need_req_ref(const struct ublk_queue *ubq)
+ {
+       /*
+@@ -921,7 +926,7 @@ static int ublk_map_io(const struct ublk
+ {
+       const unsigned int rq_bytes = blk_rq_bytes(req);
+-      if (ublk_support_user_copy(ubq))
++      if (!ublk_need_map_io(ubq))
+               return rq_bytes;
+       /*
+@@ -945,7 +950,7 @@ static int ublk_unmap_io(const struct ub
+ {
+       const unsigned int rq_bytes = blk_rq_bytes(req);
+-      if (ublk_support_user_copy(ubq))
++      if (!ublk_need_map_io(ubq))
+               return rq_bytes;
+       if (ublk_need_unmap_req(req)) {
+@@ -1914,7 +1919,7 @@ static int __ublk_ch_uring_cmd(struct io
+               if (io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)
+                       goto out;
+-              if (!ublk_support_user_copy(ubq)) {
++              if (ublk_need_map_io(ubq)) {
+                       /*
+                        * FETCH_RQ has to provide IO buffer if NEED GET
+                        * DATA is not enabled
+@@ -1936,7 +1941,7 @@ static int __ublk_ch_uring_cmd(struct io
+               if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV))
+                       goto out;
+-              if (!ublk_support_user_copy(ubq)) {
++              if (ublk_need_map_io(ubq)) {
+                       /*
+                        * COMMIT_AND_FETCH_REQ has to provide IO buffer if
+                        * NEED GET DATA is not enabled or it is Read IO.
diff --git a/queue-6.14/ublk-fix-race-between-io_uring_cmd_complete_in_task-and-ublk_cancel_cmd.patch b/queue-6.14/ublk-fix-race-between-io_uring_cmd_complete_in_task-and-ublk_cancel_cmd.patch
new file mode 100644 (file)
index 0000000..bf032bf
--- /dev/null
@@ -0,0 +1,95 @@
+From stable+bounces-142024-greg=kroah.com@vger.kernel.org Wed May  7 11:47:52 2025
+From: Jared Holzman <jholzman@nvidia.com>
+Date: Wed,  7 May 2025 12:47:02 +0300
+Subject: ublk: fix race between io_uring_cmd_complete_in_task and ublk_cancel_cmd
+To: stable@vger.kernel.org
+Cc: ming.lei@redhat.com, axboe@kernel.dk, ushankar@purestorage.com, gregkh@linuxfoundation.org, jholzman@nvidia.com
+Message-ID: <20250507094702.73459-8-jholzman@nvidia.com>
+
+From: Ming Lei <ming.lei@redhat.com>
+
+[ Upstream commit f40139fde5278d81af3227444fd6e76a76b9506d ]
+
+ublk_cancel_cmd() calls io_uring_cmd_done() to complete uring_cmd, but
+we may have scheduled task work via io_uring_cmd_complete_in_task() for
+dispatching request, then kernel crash can be triggered.
+
+Fix it by not trying to canceling the command if ublk block request is
+started.
+
+Fixes: 216c8f5ef0f2 ("ublk: replace monitor with cancelable uring_cmd")
+Reported-by: Jared Holzman <jholzman@nvidia.com>
+Tested-by: Jared Holzman <jholzman@nvidia.com>
+Closes: https://lore.kernel.org/linux-block/d2179120-171b-47ba-b664-23242981ef19@nvidia.com/
+Signed-off-by: Ming Lei <ming.lei@redhat.com>
+Link: https://lore.kernel.org/r/20250425013742.1079549-3-ming.lei@redhat.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/block/ublk_drv.c |   27 +++++++++++++++++++++------
+ 1 file changed, 21 insertions(+), 6 deletions(-)
+
+--- a/drivers/block/ublk_drv.c
++++ b/drivers/block/ublk_drv.c
+@@ -1655,14 +1655,31 @@ static void ublk_start_cancel(struct ubl
+       ublk_put_disk(disk);
+ }
+-static void ublk_cancel_cmd(struct ublk_queue *ubq, struct ublk_io *io,
++static void ublk_cancel_cmd(struct ublk_queue *ubq, unsigned tag,
+               unsigned int issue_flags)
+ {
++      struct ublk_io *io = &ubq->ios[tag];
++      struct ublk_device *ub = ubq->dev;
++      struct request *req;
+       bool done;
+       if (!(io->flags & UBLK_IO_FLAG_ACTIVE))
+               return;
++      /*
++       * Don't try to cancel this command if the request is started for
++       * avoiding race between io_uring_cmd_done() and
++       * io_uring_cmd_complete_in_task().
++       *
++       * Either the started request will be aborted via __ublk_abort_rq(),
++       * then this uring_cmd is canceled next time, or it will be done in
++       * task work function ublk_dispatch_req() because io_uring guarantees
++       * that ublk_dispatch_req() is always called
++       */
++      req = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], tag);
++      if (req && blk_mq_request_started(req))
++              return;
++
+       spin_lock(&ubq->cancel_lock);
+       done = !!(io->flags & UBLK_IO_FLAG_CANCELED);
+       if (!done)
+@@ -1694,7 +1711,6 @@ static void ublk_uring_cmd_cancel_fn(str
+       struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
+       struct ublk_queue *ubq = pdu->ubq;
+       struct task_struct *task;
+-      struct ublk_io *io;
+       if (WARN_ON_ONCE(!ubq))
+               return;
+@@ -1709,9 +1725,8 @@ static void ublk_uring_cmd_cancel_fn(str
+       if (!ubq->canceling)
+               ublk_start_cancel(ubq);
+-      io = &ubq->ios[pdu->tag];
+-      WARN_ON_ONCE(io->cmd != cmd);
+-      ublk_cancel_cmd(ubq, io, issue_flags);
++      WARN_ON_ONCE(ubq->ios[pdu->tag].cmd != cmd);
++      ublk_cancel_cmd(ubq, pdu->tag, issue_flags);
+ }
+ static inline bool ublk_queue_ready(struct ublk_queue *ubq)
+@@ -1724,7 +1739,7 @@ static void ublk_cancel_queue(struct ubl
+       int i;
+       for (i = 0; i < ubq->q_depth; i++)
+-              ublk_cancel_cmd(ubq, &ubq->ios[i], IO_URING_F_UNLOCKED);
++              ublk_cancel_cmd(ubq, i, IO_URING_F_UNLOCKED);
+ }
+ /* Cancel all pending commands, must be called after del_gendisk() returns */
diff --git a/queue-6.14/ublk-improve-detection-and-handling-of-ublk-server-exit.patch b/queue-6.14/ublk-improve-detection-and-handling-of-ublk-server-exit.patch
new file mode 100644 (file)
index 0000000..a622012
--- /dev/null
@@ -0,0 +1,459 @@
+From stable+bounces-142021-greg=kroah.com@vger.kernel.org Wed May  7 11:47:41 2025
+From: Jared Holzman <jholzman@nvidia.com>
+Date: Wed,  7 May 2025 12:46:59 +0300
+Subject: ublk: improve detection and handling of ublk server exit
+To: stable@vger.kernel.org
+Cc: ming.lei@redhat.com, axboe@kernel.dk, ushankar@purestorage.com, gregkh@linuxfoundation.org, jholzman@nvidia.com
+Message-ID: <20250507094702.73459-5-jholzman@nvidia.com>
+
+From: Uday Shankar <ushankar@purestorage.com>
+
+[ Upstream commit 82a8a30c581bbbe653d33c6ce2ef67e3072c7f12 ]
+
+There are currently two ways in which ublk server exit is detected by
+ublk_drv:
+
+1. uring_cmd cancellation. If there are any outstanding uring_cmds which
+   have not been completed to the ublk server when it exits, io_uring
+   calls the uring_cmd callback with a special cancellation flag as the
+   issuing task is exiting.
+2. I/O timeout. This is needed in addition to the above to handle the
+   "saturated queue" case, when all I/Os for a given queue are in the
+   ublk server, and therefore there are no outstanding uring_cmds to
+   cancel when the ublk server exits.
+
+There are a couple of issues with this approach:
+
+- It is complex and inelegant to have two methods to detect the same
+  condition
+- The second method detects ublk server exit only after a long delay
+  (~30s, the default timeout assigned by the block layer). This delays
+  the nosrv behavior from kicking in and potential subsequent recovery
+  of the device.
+
+The second issue is brought to light with the new test_generic_06 which
+will be added in following patch. It fails before this fix:
+
+selftests: ublk: test_generic_06.sh
+dev id is 0
+dd: error writing '/dev/ublkb0': Input/output error
+1+0 records in
+0+0 records out
+0 bytes copied, 30.0611 s, 0.0 kB/s
+DEAD
+dd took 31 seconds to exit (>= 5s tolerance)!
+generic_06 : [FAIL]
+
+Fix this by instead detecting and handling ublk server exit in the
+character file release callback. This has several advantages:
+
+- This one place can handle both saturated and unsaturated queues. Thus,
+  it replaces both preexisting methods of detecting ublk server exit.
+- It runs quickly on ublk server exit - there is no 30s delay.
+- It starts the process of removing task references in ublk_drv. This is
+  needed if we want to relax restrictions in the driver like letting
+  only one thread serve each queue
+
+There is also the disadvantage that the character file release callback
+can also be triggered by intentional close of the file, which is a
+significant behavior change. Preexisting ublk servers (libublksrv) are
+dependent on the ability to open/close the file multiple times. To
+address this, only transition to a nosrv state if the file is released
+while the ublk device is live. This allows for programs to open/close
+the file multiple times during setup. It is still a behavior change if a
+ublk server decides to close/reopen the file while the device is LIVE
+(i.e. while it is responsible for serving I/O), but that would be highly
+unusual. This behavior is in line with what is done by FUSE, which is
+very similar to ublk in that a userspace daemon is providing services
+traditionally provided by the kernel.
+
+With this change in, the new test (and all other selftests, and all
+ublksrv tests) pass:
+
+selftests: ublk: test_generic_06.sh
+dev id is 0
+dd: error writing '/dev/ublkb0': Input/output error
+1+0 records in
+0+0 records out
+0 bytes copied, 0.0376731 s, 0.0 kB/s
+DEAD
+generic_04 : [PASS]
+
+Signed-off-by: Uday Shankar <ushankar@purestorage.com>
+Signed-off-by: Ming Lei <ming.lei@redhat.com>
+Link: https://lore.kernel.org/r/20250416035444.99569-6-ming.lei@redhat.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/block/ublk_drv.c |  223 ++++++++++++++++++++++++++---------------------
+ 1 file changed, 124 insertions(+), 99 deletions(-)
+
+--- a/drivers/block/ublk_drv.c
++++ b/drivers/block/ublk_drv.c
+@@ -194,8 +194,6 @@ struct ublk_device {
+       struct completion       completion;
+       unsigned int            nr_queues_ready;
+       unsigned int            nr_privileged_daemon;
+-
+-      struct work_struct      nosrv_work;
+ };
+ /* header of ublk_params */
+@@ -204,7 +202,10 @@ struct ublk_params_header {
+       __u32   types;
+ };
+-static bool ublk_abort_requests(struct ublk_device *ub, struct ublk_queue *ubq);
++
++static void ublk_stop_dev_unlocked(struct ublk_device *ub);
++static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq);
++static void __ublk_quiesce_dev(struct ublk_device *ub);
+ static inline unsigned int ublk_req_build_flags(struct request *req);
+ static inline struct ublksrv_io_desc *ublk_get_iod(struct ublk_queue *ubq,
+@@ -1306,8 +1307,6 @@ static void ublk_queue_cmd_list(struct u
+ static enum blk_eh_timer_return ublk_timeout(struct request *rq)
+ {
+       struct ublk_queue *ubq = rq->mq_hctx->driver_data;
+-      unsigned int nr_inflight = 0;
+-      int i;
+       if (ubq->flags & UBLK_F_UNPRIVILEGED_DEV) {
+               if (!ubq->timeout) {
+@@ -1318,26 +1317,6 @@ static enum blk_eh_timer_return ublk_tim
+               return BLK_EH_DONE;
+       }
+-      if (!ubq_daemon_is_dying(ubq))
+-              return BLK_EH_RESET_TIMER;
+-
+-      for (i = 0; i < ubq->q_depth; i++) {
+-              struct ublk_io *io = &ubq->ios[i];
+-
+-              if (!(io->flags & UBLK_IO_FLAG_ACTIVE))
+-                      nr_inflight++;
+-      }
+-
+-      /* cancelable uring_cmd can't help us if all commands are in-flight */
+-      if (nr_inflight == ubq->q_depth) {
+-              struct ublk_device *ub = ubq->dev;
+-
+-              if (ublk_abort_requests(ub, ubq)) {
+-                      schedule_work(&ub->nosrv_work);
+-              }
+-              return BLK_EH_DONE;
+-      }
+-
+       return BLK_EH_RESET_TIMER;
+ }
+@@ -1495,13 +1474,105 @@ static void ublk_reset_ch_dev(struct ubl
+       ub->nr_privileged_daemon = 0;
+ }
++static struct gendisk *ublk_get_disk(struct ublk_device *ub)
++{
++      struct gendisk *disk;
++
++      spin_lock(&ub->lock);
++      disk = ub->ub_disk;
++      if (disk)
++              get_device(disk_to_dev(disk));
++      spin_unlock(&ub->lock);
++
++      return disk;
++}
++
++static void ublk_put_disk(struct gendisk *disk)
++{
++      if (disk)
++              put_device(disk_to_dev(disk));
++}
++
+ static int ublk_ch_release(struct inode *inode, struct file *filp)
+ {
+       struct ublk_device *ub = filp->private_data;
++      struct gendisk *disk;
++      int i;
++
++      /*
++       * disk isn't attached yet, either device isn't live, or it has
++       * been removed already, so we needn't to do anything
++       */
++      disk = ublk_get_disk(ub);
++      if (!disk)
++              goto out;
++
++      /*
++       * All uring_cmd are done now, so abort any request outstanding to
++       * the ublk server
++       *
++       * This can be done in lockless way because ublk server has been
++       * gone
++       *
++       * More importantly, we have to provide forward progress guarantee
++       * without holding ub->mutex, otherwise control task grabbing
++       * ub->mutex triggers deadlock
++       *
++       * All requests may be inflight, so ->canceling may not be set, set
++       * it now.
++       */
++      for (i = 0; i < ub->dev_info.nr_hw_queues; i++) {
++              struct ublk_queue *ubq = ublk_get_queue(ub, i);
++
++              ubq->canceling = true;
++              ublk_abort_queue(ub, ubq);
++      }
++      blk_mq_kick_requeue_list(disk->queue);
++
++      /*
++       * All infligh requests have been completed or requeued and any new
++       * request will be failed or requeued via `->canceling` now, so it is
++       * fine to grab ub->mutex now.
++       */
++      mutex_lock(&ub->mutex);
++
++      /* double check after grabbing lock */
++      if (!ub->ub_disk)
++              goto unlock;
++
++      /*
++       * Transition the device to the nosrv state. What exactly this
++       * means depends on the recovery flags
++       */
++      blk_mq_quiesce_queue(disk->queue);
++      if (ublk_nosrv_should_stop_dev(ub)) {
++              /*
++               * Allow any pending/future I/O to pass through quickly
++               * with an error. This is needed because del_gendisk
++               * waits for all pending I/O to complete
++               */
++              for (i = 0; i < ub->dev_info.nr_hw_queues; i++)
++                      ublk_get_queue(ub, i)->force_abort = true;
++              blk_mq_unquiesce_queue(disk->queue);
++
++              ublk_stop_dev_unlocked(ub);
++      } else {
++              if (ublk_nosrv_dev_should_queue_io(ub)) {
++                      __ublk_quiesce_dev(ub);
++              } else {
++                      ub->dev_info.state = UBLK_S_DEV_FAIL_IO;
++                      for (i = 0; i < ub->dev_info.nr_hw_queues; i++)
++                              ublk_get_queue(ub, i)->fail_io = true;
++              }
++              blk_mq_unquiesce_queue(disk->queue);
++      }
++unlock:
++      mutex_unlock(&ub->mutex);
++      ublk_put_disk(disk);
+       /* all uring_cmd has been done now, reset device & ubq */
+       ublk_reset_ch_dev(ub);
+-
++out:
+       clear_bit(UB_STATE_OPEN, &ub->state);
+       return 0;
+ }
+@@ -1597,37 +1668,22 @@ static void ublk_abort_queue(struct ublk
+ }
+ /* Must be called when queue is frozen */
+-static bool ublk_mark_queue_canceling(struct ublk_queue *ubq)
++static void ublk_mark_queue_canceling(struct ublk_queue *ubq)
+ {
+-      bool canceled;
+-
+       spin_lock(&ubq->cancel_lock);
+-      canceled = ubq->canceling;
+-      if (!canceled)
++      if (!ubq->canceling)
+               ubq->canceling = true;
+       spin_unlock(&ubq->cancel_lock);
+-
+-      return canceled;
+ }
+-static bool ublk_abort_requests(struct ublk_device *ub, struct ublk_queue *ubq)
++static void ublk_start_cancel(struct ublk_queue *ubq)
+ {
+-      bool was_canceled = ubq->canceling;
+-      struct gendisk *disk;
+-
+-      if (was_canceled)
+-              return false;
+-
+-      spin_lock(&ub->lock);
+-      disk = ub->ub_disk;
+-      if (disk)
+-              get_device(disk_to_dev(disk));
+-      spin_unlock(&ub->lock);
++      struct ublk_device *ub = ubq->dev;
++      struct gendisk *disk = ublk_get_disk(ub);
+       /* Our disk has been dead */
+       if (!disk)
+-              return false;
+-
++              return;
+       /*
+        * Now we are serialized with ublk_queue_rq()
+        *
+@@ -1636,15 +1692,9 @@ static bool ublk_abort_requests(struct u
+        * touch completed uring_cmd
+        */
+       blk_mq_quiesce_queue(disk->queue);
+-      was_canceled = ublk_mark_queue_canceling(ubq);
+-      if (!was_canceled) {
+-              /* abort queue is for making forward progress */
+-              ublk_abort_queue(ub, ubq);
+-      }
++      ublk_mark_queue_canceling(ubq);
+       blk_mq_unquiesce_queue(disk->queue);
+-      put_device(disk_to_dev(disk));
+-
+-      return !was_canceled;
++      ublk_put_disk(disk);
+ }
+ static void ublk_cancel_cmd(struct ublk_queue *ubq, struct ublk_io *io,
+@@ -1668,6 +1718,17 @@ static void ublk_cancel_cmd(struct ublk_
+ /*
+  * The ublk char device won't be closed when calling cancel fn, so both
+  * ublk device and queue are guaranteed to be live
++ *
++ * Two-stage cancel:
++ *
++ * - make every active uring_cmd done in ->cancel_fn()
++ *
++ * - aborting inflight ublk IO requests in ublk char device release handler,
++ *   which depends on 1st stage because device can only be closed iff all
++ *   uring_cmd are done
++ *
++ * Do _not_ try to acquire ub->mutex before all inflight requests are
++ * aborted, otherwise deadlock may be caused.
+  */
+ static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd,
+               unsigned int issue_flags)
+@@ -1675,8 +1736,6 @@ static void ublk_uring_cmd_cancel_fn(str
+       struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
+       struct ublk_queue *ubq = pdu->ubq;
+       struct task_struct *task;
+-      struct ublk_device *ub;
+-      bool need_schedule;
+       struct ublk_io *io;
+       if (WARN_ON_ONCE(!ubq))
+@@ -1689,16 +1748,12 @@ static void ublk_uring_cmd_cancel_fn(str
+       if (WARN_ON_ONCE(task && task != ubq->ubq_daemon))
+               return;
+-      ub = ubq->dev;
+-      need_schedule = ublk_abort_requests(ub, ubq);
++      if (!ubq->canceling)
++              ublk_start_cancel(ubq);
+       io = &ubq->ios[pdu->tag];
+       WARN_ON_ONCE(io->cmd != cmd);
+       ublk_cancel_cmd(ubq, io, issue_flags);
+-
+-      if (need_schedule) {
+-              schedule_work(&ub->nosrv_work);
+-      }
+ }
+ static inline bool ublk_queue_ready(struct ublk_queue *ubq)
+@@ -1757,13 +1812,11 @@ static void __ublk_quiesce_dev(struct ub
+                       __func__, ub->dev_info.dev_id,
+                       ub->dev_info.state == UBLK_S_DEV_LIVE ?
+                       "LIVE" : "QUIESCED");
+-      blk_mq_quiesce_queue(ub->ub_disk->queue);
+       /* mark every queue as canceling */
+       for (i = 0; i < ub->dev_info.nr_hw_queues; i++)
+               ublk_get_queue(ub, i)->canceling = true;
+       ublk_wait_tagset_rqs_idle(ub);
+       ub->dev_info.state = UBLK_S_DEV_QUIESCED;
+-      blk_mq_unquiesce_queue(ub->ub_disk->queue);
+ }
+ static void ublk_force_abort_dev(struct ublk_device *ub)
+@@ -1800,50 +1853,25 @@ static struct gendisk *ublk_detach_disk(
+       return disk;
+ }
+-static void ublk_stop_dev(struct ublk_device *ub)
++static void ublk_stop_dev_unlocked(struct ublk_device *ub)
++      __must_hold(&ub->mutex)
+ {
+       struct gendisk *disk;
+-      mutex_lock(&ub->mutex);
+       if (ub->dev_info.state == UBLK_S_DEV_DEAD)
+-              goto unlock;
++              return;
++
+       if (ublk_nosrv_dev_should_queue_io(ub))
+               ublk_force_abort_dev(ub);
+       del_gendisk(ub->ub_disk);
+       disk = ublk_detach_disk(ub);
+       put_disk(disk);
+- unlock:
+-      mutex_unlock(&ub->mutex);
+-      ublk_cancel_dev(ub);
+ }
+-static void ublk_nosrv_work(struct work_struct *work)
++static void ublk_stop_dev(struct ublk_device *ub)
+ {
+-      struct ublk_device *ub =
+-              container_of(work, struct ublk_device, nosrv_work);
+-      int i;
+-
+-      if (ublk_nosrv_should_stop_dev(ub)) {
+-              ublk_stop_dev(ub);
+-              return;
+-      }
+-
+       mutex_lock(&ub->mutex);
+-      if (ub->dev_info.state != UBLK_S_DEV_LIVE)
+-              goto unlock;
+-
+-      if (ublk_nosrv_dev_should_queue_io(ub)) {
+-              __ublk_quiesce_dev(ub);
+-      } else {
+-              blk_mq_quiesce_queue(ub->ub_disk->queue);
+-              ub->dev_info.state = UBLK_S_DEV_FAIL_IO;
+-              for (i = 0; i < ub->dev_info.nr_hw_queues; i++) {
+-                      ublk_get_queue(ub, i)->fail_io = true;
+-              }
+-              blk_mq_unquiesce_queue(ub->ub_disk->queue);
+-      }
+-
+- unlock:
++      ublk_stop_dev_unlocked(ub);
+       mutex_unlock(&ub->mutex);
+       ublk_cancel_dev(ub);
+ }
+@@ -2419,7 +2447,6 @@ static int ublk_add_tag_set(struct ublk_
+ static void ublk_remove(struct ublk_device *ub)
+ {
+       ublk_stop_dev(ub);
+-      cancel_work_sync(&ub->nosrv_work);
+       cdev_device_del(&ub->cdev, &ub->cdev_dev);
+       ublk_put_device(ub);
+       ublks_added--;
+@@ -2693,7 +2720,6 @@ static int ublk_ctrl_add_dev(struct io_u
+               goto out_unlock;
+       mutex_init(&ub->mutex);
+       spin_lock_init(&ub->lock);
+-      INIT_WORK(&ub->nosrv_work, ublk_nosrv_work);
+       ret = ublk_alloc_dev_number(ub, header->dev_id);
+       if (ret < 0)
+@@ -2828,7 +2854,6 @@ static inline void ublk_ctrl_cmd_dump(st
+ static int ublk_ctrl_stop_dev(struct ublk_device *ub)
+ {
+       ublk_stop_dev(ub);
+-      cancel_work_sync(&ub->nosrv_work);
+       return 0;
+ }
diff --git a/queue-6.14/ublk-move-device-reset-into-ublk_ch_release.patch b/queue-6.14/ublk-move-device-reset-into-ublk_ch_release.patch
new file mode 100644 (file)
index 0000000..34f609d
--- /dev/null
@@ -0,0 +1,248 @@
+From stable+bounces-142020-greg=kroah.com@vger.kernel.org Wed May  7 11:47:39 2025
+From: Jared Holzman <jholzman@nvidia.com>
+Date: Wed,  7 May 2025 12:46:58 +0300
+Subject: ublk: move device reset into ublk_ch_release()
+To: stable@vger.kernel.org
+Cc: ming.lei@redhat.com, axboe@kernel.dk, ushankar@purestorage.com, gregkh@linuxfoundation.org, jholzman@nvidia.com
+Message-ID: <20250507094702.73459-4-jholzman@nvidia.com>
+
+From: Ming Lei <ming.lei@redhat.com>
+
+[ Upstream commit 728cbac5fe219d3b8a21a0688a08f2b7f8aeda2b ]
+
+ublk_ch_release() is called after ublk char device is closed, when all
+uring_cmd are done, so it is perfect fine to move ublk device reset to
+ublk_ch_release() from ublk_ctrl_start_recovery().
+
+This way can avoid to grab the exiting daemon task_struct too long.
+
+However, reset of the following ublk IO flags has to be moved until ublk
+io_uring queues are ready:
+
+- ubq->canceling
+
+For requeuing IO in case of ublk_nosrv_dev_should_queue_io() before device
+is recovered
+
+- ubq->fail_io
+
+For failing IO in case of UBLK_F_USER_RECOVERY_FAIL_IO before device is
+recovered
+
+- ublk_io->flags
+
+For preventing using io->cmd
+
+With this way, recovery is simplified a lot.
+
+Signed-off-by: Ming Lei <ming.lei@redhat.com>
+Link: https://lore.kernel.org/r/20250416035444.99569-5-ming.lei@redhat.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/block/ublk_drv.c |  121 +++++++++++++++++++++++++++--------------------
+ 1 file changed, 72 insertions(+), 49 deletions(-)
+
+--- a/drivers/block/ublk_drv.c
++++ b/drivers/block/ublk_drv.c
+@@ -1043,7 +1043,7 @@ static inline struct ublk_uring_cmd_pdu
+ static inline bool ubq_daemon_is_dying(struct ublk_queue *ubq)
+ {
+-      return ubq->ubq_daemon->flags & PF_EXITING;
++      return !ubq->ubq_daemon || ubq->ubq_daemon->flags & PF_EXITING;
+ }
+ /* todo: handle partial completion */
+@@ -1440,6 +1440,37 @@ static const struct blk_mq_ops ublk_mq_o
+       .timeout        = ublk_timeout,
+ };
++static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq)
++{
++      int i;
++
++      /* All old ioucmds have to be completed */
++      ubq->nr_io_ready = 0;
++
++      /*
++       * old daemon is PF_EXITING, put it now
++       *
++       * It could be NULL in case of closing one quisced device.
++       */
++      if (ubq->ubq_daemon)
++              put_task_struct(ubq->ubq_daemon);
++      /* We have to reset it to NULL, otherwise ub won't accept new FETCH_REQ */
++      ubq->ubq_daemon = NULL;
++      ubq->timeout = false;
++
++      for (i = 0; i < ubq->q_depth; i++) {
++              struct ublk_io *io = &ubq->ios[i];
++
++              /*
++               * UBLK_IO_FLAG_CANCELED is kept for avoiding to touch
++               * io->cmd
++               */
++              io->flags &= UBLK_IO_FLAG_CANCELED;
++              io->cmd = NULL;
++              io->addr = 0;
++      }
++}
++
+ static int ublk_ch_open(struct inode *inode, struct file *filp)
+ {
+       struct ublk_device *ub = container_of(inode->i_cdev,
+@@ -1451,10 +1482,26 @@ static int ublk_ch_open(struct inode *in
+       return 0;
+ }
++static void ublk_reset_ch_dev(struct ublk_device *ub)
++{
++      int i;
++
++      for (i = 0; i < ub->dev_info.nr_hw_queues; i++)
++              ublk_queue_reinit(ub, ublk_get_queue(ub, i));
++
++      /* set to NULL, otherwise new ubq_daemon cannot mmap the io_cmd_buf */
++      ub->mm = NULL;
++      ub->nr_queues_ready = 0;
++      ub->nr_privileged_daemon = 0;
++}
++
+ static int ublk_ch_release(struct inode *inode, struct file *filp)
+ {
+       struct ublk_device *ub = filp->private_data;
++      /* all uring_cmd has been done now, reset device & ubq */
++      ublk_reset_ch_dev(ub);
++
+       clear_bit(UB_STATE_OPEN, &ub->state);
+       return 0;
+ }
+@@ -1801,6 +1848,24 @@ static void ublk_nosrv_work(struct work_
+       ublk_cancel_dev(ub);
+ }
++/* reset ublk io_uring queue & io flags */
++static void ublk_reset_io_flags(struct ublk_device *ub)
++{
++      int i, j;
++
++      for (i = 0; i < ub->dev_info.nr_hw_queues; i++) {
++              struct ublk_queue *ubq = ublk_get_queue(ub, i);
++
++              /* UBLK_IO_FLAG_CANCELED can be cleared now */
++              spin_lock(&ubq->cancel_lock);
++              for (j = 0; j < ubq->q_depth; j++)
++                      ubq->ios[j].flags &= ~UBLK_IO_FLAG_CANCELED;
++              spin_unlock(&ubq->cancel_lock);
++              ubq->canceling = false;
++              ubq->fail_io = false;
++      }
++}
++
+ /* device can only be started after all IOs are ready */
+ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq)
+       __must_hold(&ub->mutex)
+@@ -1814,8 +1879,12 @@ static void ublk_mark_io_ready(struct ub
+               if (capable(CAP_SYS_ADMIN))
+                       ub->nr_privileged_daemon++;
+       }
+-      if (ub->nr_queues_ready == ub->dev_info.nr_hw_queues)
++
++      if (ub->nr_queues_ready == ub->dev_info.nr_hw_queues) {
++              /* now we are ready for handling ublk io request */
++              ublk_reset_io_flags(ub);
+               complete_all(&ub->completion);
++      }
+ }
+ static inline int ublk_check_cmd_op(u32 cmd_op)
+@@ -2866,42 +2935,15 @@ static int ublk_ctrl_set_params(struct u
+       return ret;
+ }
+-static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq)
+-{
+-      int i;
+-
+-      WARN_ON_ONCE(!(ubq->ubq_daemon && ubq_daemon_is_dying(ubq)));
+-
+-      /* All old ioucmds have to be completed */
+-      ubq->nr_io_ready = 0;
+-      /* old daemon is PF_EXITING, put it now */
+-      put_task_struct(ubq->ubq_daemon);
+-      /* We have to reset it to NULL, otherwise ub won't accept new FETCH_REQ */
+-      ubq->ubq_daemon = NULL;
+-      ubq->timeout = false;
+-
+-      for (i = 0; i < ubq->q_depth; i++) {
+-              struct ublk_io *io = &ubq->ios[i];
+-
+-              /* forget everything now and be ready for new FETCH_REQ */
+-              io->flags = 0;
+-              io->cmd = NULL;
+-              io->addr = 0;
+-      }
+-}
+-
+ static int ublk_ctrl_start_recovery(struct ublk_device *ub,
+               struct io_uring_cmd *cmd)
+ {
+       const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
+       int ret = -EINVAL;
+-      int i;
+       mutex_lock(&ub->mutex);
+       if (ublk_nosrv_should_stop_dev(ub))
+               goto out_unlock;
+-      if (!ub->nr_queues_ready)
+-              goto out_unlock;
+       /*
+        * START_RECOVERY is only allowd after:
+        *
+@@ -2925,12 +2967,6 @@ static int ublk_ctrl_start_recovery(stru
+               goto out_unlock;
+       }
+       pr_devel("%s: start recovery for dev id %d.\n", __func__, header->dev_id);
+-      for (i = 0; i < ub->dev_info.nr_hw_queues; i++)
+-              ublk_queue_reinit(ub, ublk_get_queue(ub, i));
+-      /* set to NULL, otherwise new ubq_daemon cannot mmap the io_cmd_buf */
+-      ub->mm = NULL;
+-      ub->nr_queues_ready = 0;
+-      ub->nr_privileged_daemon = 0;
+       init_completion(&ub->completion);
+       ret = 0;
+  out_unlock:
+@@ -2944,7 +2980,6 @@ static int ublk_ctrl_end_recovery(struct
+       const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
+       int ublksrv_pid = (int)header->data[0];
+       int ret = -EINVAL;
+-      int i;
+       pr_devel("%s: Waiting for new ubq_daemons(nr: %d) are ready, dev id %d...\n",
+                       __func__, ub->dev_info.nr_hw_queues, header->dev_id);
+@@ -2964,22 +2999,10 @@ static int ublk_ctrl_end_recovery(struct
+               goto out_unlock;
+       }
+       ub->dev_info.ublksrv_pid = ublksrv_pid;
++      ub->dev_info.state = UBLK_S_DEV_LIVE;
+       pr_devel("%s: new ublksrv_pid %d, dev id %d\n",
+                       __func__, ublksrv_pid, header->dev_id);
+-
+-      blk_mq_quiesce_queue(ub->ub_disk->queue);
+-      ub->dev_info.state = UBLK_S_DEV_LIVE;
+-      for (i = 0; i < ub->dev_info.nr_hw_queues; i++) {
+-              struct ublk_queue *ubq = ublk_get_queue(ub, i);
+-
+-              ubq->canceling = false;
+-              ubq->fail_io = false;
+-      }
+-      blk_mq_unquiesce_queue(ub->ub_disk->queue);
+-      pr_devel("%s: queue unquiesced, dev id %d.\n",
+-                      __func__, header->dev_id);
+       blk_mq_kick_requeue_list(ub->ub_disk->queue);
+-
+       ret = 0;
+  out_unlock:
+       mutex_unlock(&ub->mutex);
diff --git a/queue-6.14/ublk-properly-serialize-all-fetch_reqs.patch b/queue-6.14/ublk-properly-serialize-all-fetch_reqs.patch
new file mode 100644 (file)
index 0000000..6e0db17
--- /dev/null
@@ -0,0 +1,142 @@
+From stable+bounces-142019-greg=kroah.com@vger.kernel.org Wed May  7 11:47:35 2025
+From: Jared Holzman <jholzman@nvidia.com>
+Date: Wed,  7 May 2025 12:46:57 +0300
+Subject: ublk: properly serialize all FETCH_REQs
+To: stable@vger.kernel.org
+Cc: ming.lei@redhat.com, axboe@kernel.dk, ushankar@purestorage.com, gregkh@linuxfoundation.org, jholzman@nvidia.com
+Message-ID: <20250507094702.73459-3-jholzman@nvidia.com>
+
+From: Uday Shankar <ushankar@purestorage.com>
+
+[ Upstream commit b69b8edfb27dfa563cd53f590ec42b481f9eb174 ]
+
+Most uring_cmds issued against ublk character devices are serialized
+because each command affects only one queue, and there is an early check
+which only allows a single task (the queue's ubq_daemon) to issue
+uring_cmds against that queue. However, this mechanism does not work for
+FETCH_REQs, since they are expected before ubq_daemon is set. Since
+FETCH_REQs are only used at initialization and not in the fast path,
+serialize them using the per-ublk-device mutex. This fixes a number of
+data races that were previously possible if a badly behaved ublk server
+decided to issue multiple FETCH_REQs against the same qid/tag
+concurrently.
+
+Reported-by: Caleb Sander Mateos <csander@purestorage.com>
+Signed-off-by: Uday Shankar <ushankar@purestorage.com>
+Signed-off-by: Ming Lei <ming.lei@redhat.com>
+Link: https://lore.kernel.org/r/20250416035444.99569-2-ming.lei@redhat.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/block/ublk_drv.c |   77 +++++++++++++++++++++++++++++------------------
+ 1 file changed, 49 insertions(+), 28 deletions(-)
+
+--- a/drivers/block/ublk_drv.c
++++ b/drivers/block/ublk_drv.c
+@@ -1803,8 +1803,8 @@ static void ublk_nosrv_work(struct work_
+ /* device can only be started after all IOs are ready */
+ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq)
++      __must_hold(&ub->mutex)
+ {
+-      mutex_lock(&ub->mutex);
+       ubq->nr_io_ready++;
+       if (ublk_queue_ready(ubq)) {
+               ubq->ubq_daemon = current;
+@@ -1816,7 +1816,6 @@ static void ublk_mark_io_ready(struct ub
+       }
+       if (ub->nr_queues_ready == ub->dev_info.nr_hw_queues)
+               complete_all(&ub->completion);
+-      mutex_unlock(&ub->mutex);
+ }
+ static inline int ublk_check_cmd_op(u32 cmd_op)
+@@ -1855,6 +1854,52 @@ static inline void ublk_prep_cancel(stru
+       io_uring_cmd_mark_cancelable(cmd, issue_flags);
+ }
++static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_queue *ubq,
++                    struct ublk_io *io, __u64 buf_addr)
++{
++      struct ublk_device *ub = ubq->dev;
++      int ret = 0;
++
++      /*
++       * When handling FETCH command for setting up ublk uring queue,
++       * ub->mutex is the innermost lock, and we won't block for handling
++       * FETCH, so it is fine even for IO_URING_F_NONBLOCK.
++       */
++      mutex_lock(&ub->mutex);
++      /* UBLK_IO_FETCH_REQ is only allowed before queue is setup */
++      if (ublk_queue_ready(ubq)) {
++              ret = -EBUSY;
++              goto out;
++      }
++
++      /* allow each command to be FETCHed at most once */
++      if (io->flags & UBLK_IO_FLAG_ACTIVE) {
++              ret = -EINVAL;
++              goto out;
++      }
++
++      WARN_ON_ONCE(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV);
++
++      if (ublk_need_map_io(ubq)) {
++              /*
++               * FETCH_RQ has to provide IO buffer if NEED GET
++               * DATA is not enabled
++               */
++              if (!buf_addr && !ublk_need_get_data(ubq))
++                      goto out;
++      } else if (buf_addr) {
++              /* User copy requires addr to be unset */
++              ret = -EINVAL;
++              goto out;
++      }
++
++      ublk_fill_io_cmd(io, cmd, buf_addr);
++      ublk_mark_io_ready(ub, ubq);
++out:
++      mutex_unlock(&ub->mutex);
++      return ret;
++}
++
+ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
+                              unsigned int issue_flags,
+                              const struct ublksrv_io_cmd *ub_cmd)
+@@ -1907,33 +1952,9 @@ static int __ublk_ch_uring_cmd(struct io
+       ret = -EINVAL;
+       switch (_IOC_NR(cmd_op)) {
+       case UBLK_IO_FETCH_REQ:
+-              /* UBLK_IO_FETCH_REQ is only allowed before queue is setup */
+-              if (ublk_queue_ready(ubq)) {
+-                      ret = -EBUSY;
+-                      goto out;
+-              }
+-              /*
+-               * The io is being handled by server, so COMMIT_RQ is expected
+-               * instead of FETCH_REQ
+-               */
+-              if (io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)
+-                      goto out;
+-
+-              if (ublk_need_map_io(ubq)) {
+-                      /*
+-                       * FETCH_RQ has to provide IO buffer if NEED GET
+-                       * DATA is not enabled
+-                       */
+-                      if (!ub_cmd->addr && !ublk_need_get_data(ubq))
+-                              goto out;
+-              } else if (ub_cmd->addr) {
+-                      /* User copy requires addr to be unset */
+-                      ret = -EINVAL;
++              ret = ublk_fetch(cmd, ubq, io, ub_cmd->addr);
++              if (ret)
+                       goto out;
+-              }
+-
+-              ublk_fill_io_cmd(io, cmd, ub_cmd->addr);
+-              ublk_mark_io_ready(ub, ubq);
+               break;
+       case UBLK_IO_COMMIT_AND_FETCH_REQ:
+               req = blk_mq_tag_to_rq(ub->tag_set.tags[ub_cmd->q_id], tag);
diff --git a/queue-6.14/ublk-remove-__ublk_quiesce_dev.patch b/queue-6.14/ublk-remove-__ublk_quiesce_dev.patch
new file mode 100644 (file)
index 0000000..59a05c0
--- /dev/null
@@ -0,0 +1,73 @@
+From stable+bounces-142022-greg=kroah.com@vger.kernel.org Wed May  7 11:47:44 2025
+From: Jared Holzman <jholzman@nvidia.com>
+Date: Wed,  7 May 2025 12:47:00 +0300
+Subject: ublk: remove __ublk_quiesce_dev()
+To: stable@vger.kernel.org
+Cc: ming.lei@redhat.com, axboe@kernel.dk, ushankar@purestorage.com, gregkh@linuxfoundation.org, jholzman@nvidia.com
+Message-ID: <20250507094702.73459-6-jholzman@nvidia.com>
+
+From: Ming Lei <ming.lei@redhat.com>
+
+[ Upstream commit 736b005b413a172670711ee17cab3c8ccab83223 ]
+
+Remove __ublk_quiesce_dev() and open code for updating device state as
+QUIESCED.
+
+We needn't to drain inflight requests in __ublk_quiesce_dev() any more,
+because all inflight requests are aborted in ublk char device release
+handler.
+
+Also we needn't to set ->canceling in __ublk_quiesce_dev() any more
+because it is done unconditionally now in ublk_ch_release().
+
+Reviewed-by: Uday Shankar <ushankar@purestorage.com>
+Signed-off-by: Ming Lei <ming.lei@redhat.com>
+Link: https://lore.kernel.org/r/20250416035444.99569-7-ming.lei@redhat.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/block/ublk_drv.c |   19 ++-----------------
+ 1 file changed, 2 insertions(+), 17 deletions(-)
+
+--- a/drivers/block/ublk_drv.c
++++ b/drivers/block/ublk_drv.c
+@@ -205,7 +205,6 @@ struct ublk_params_header {
+ static void ublk_stop_dev_unlocked(struct ublk_device *ub);
+ static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq);
+-static void __ublk_quiesce_dev(struct ublk_device *ub);
+ static inline unsigned int ublk_req_build_flags(struct request *req);
+ static inline struct ublksrv_io_desc *ublk_get_iod(struct ublk_queue *ubq,
+@@ -1558,7 +1557,8 @@ static int ublk_ch_release(struct inode
+               ublk_stop_dev_unlocked(ub);
+       } else {
+               if (ublk_nosrv_dev_should_queue_io(ub)) {
+-                      __ublk_quiesce_dev(ub);
++                      /* ->canceling is set and all requests are aborted */
++                      ub->dev_info.state = UBLK_S_DEV_QUIESCED;
+               } else {
+                       ub->dev_info.state = UBLK_S_DEV_FAIL_IO;
+                       for (i = 0; i < ub->dev_info.nr_hw_queues; i++)
+@@ -1804,21 +1804,6 @@ static void ublk_wait_tagset_rqs_idle(st
+       }
+ }
+-static void __ublk_quiesce_dev(struct ublk_device *ub)
+-{
+-      int i;
+-
+-      pr_devel("%s: quiesce ub: dev_id %d state %s\n",
+-                      __func__, ub->dev_info.dev_id,
+-                      ub->dev_info.state == UBLK_S_DEV_LIVE ?
+-                      "LIVE" : "QUIESCED");
+-      /* mark every queue as canceling */
+-      for (i = 0; i < ub->dev_info.nr_hw_queues; i++)
+-              ublk_get_queue(ub, i)->canceling = true;
+-      ublk_wait_tagset_rqs_idle(ub);
+-      ub->dev_info.state = UBLK_S_DEV_QUIESCED;
+-}
+-
+ static void ublk_force_abort_dev(struct ublk_device *ub)
+ {
+       int i;
diff --git a/queue-6.14/ublk-simplify-aborting-ublk-request.patch b/queue-6.14/ublk-simplify-aborting-ublk-request.patch
new file mode 100644 (file)
index 0000000..73193e1
--- /dev/null
@@ -0,0 +1,153 @@
+From stable+bounces-142023-greg=kroah.com@vger.kernel.org Wed May  7 11:47:53 2025
+From: Jared Holzman <jholzman@nvidia.com>
+Date: Wed,  7 May 2025 12:47:01 +0300
+Subject: ublk: simplify aborting ublk request
+To: stable@vger.kernel.org
+Cc: ming.lei@redhat.com, axboe@kernel.dk, ushankar@purestorage.com, gregkh@linuxfoundation.org, jholzman@nvidia.com
+Message-ID: <20250507094702.73459-7-jholzman@nvidia.com>
+
+From: Ming Lei <ming.lei@redhat.com>
+
+[ Upstream commit e63d2228ef831af36f963b3ab8604160cfff84c1 ]
+
+Now ublk_abort_queue() is moved to ublk char device release handler,
+meantime our request queue is "quiesced" because either ->canceling was
+set from uring_cmd cancel function or all IOs are inflight and can't be
+completed by ublk server, things becomes easy much:
+
+- all uring_cmd are done, so we needn't to mark io as UBLK_IO_FLAG_ABORTED
+for handling completion from uring_cmd
+
+- ublk char device is closed, no one can hold IO request reference any more,
+so we can simply complete this request or requeue it for ublk_nosrv_should_reissue_outstanding.
+
+Reviewed-by: Uday Shankar <ushankar@purestorage.com>
+Signed-off-by: Ming Lei <ming.lei@redhat.com>
+Link: https://lore.kernel.org/r/20250416035444.99569-8-ming.lei@redhat.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/block/ublk_drv.c |   82 +++++++++++------------------------------------
+ 1 file changed, 20 insertions(+), 62 deletions(-)
+
+--- a/drivers/block/ublk_drv.c
++++ b/drivers/block/ublk_drv.c
+@@ -116,15 +116,6 @@ struct ublk_uring_cmd_pdu {
+ #define UBLK_IO_FLAG_OWNED_BY_SRV 0x02
+ /*
+- * IO command is aborted, so this flag is set in case of
+- * !UBLK_IO_FLAG_ACTIVE.
+- *
+- * After this flag is observed, any pending or new incoming request
+- * associated with this io command will be failed immediately
+- */
+-#define UBLK_IO_FLAG_ABORTED 0x04
+-
+-/*
+  * UBLK_IO_FLAG_NEED_GET_DATA is set because IO command requires
+  * get data buffer address from ublksrv.
+  *
+@@ -1054,12 +1045,6 @@ static inline void __ublk_complete_rq(st
+       unsigned int unmapped_bytes;
+       blk_status_t res = BLK_STS_OK;
+-      /* called from ublk_abort_queue() code path */
+-      if (io->flags & UBLK_IO_FLAG_ABORTED) {
+-              res = BLK_STS_IOERR;
+-              goto exit;
+-      }
+-
+       /* failed read IO if nothing is read */
+       if (!io->res && req_op(req) == REQ_OP_READ)
+               io->res = -EIO;
+@@ -1109,47 +1094,6 @@ static void ublk_complete_rq(struct kref
+       __ublk_complete_rq(req);
+ }
+-static void ublk_do_fail_rq(struct request *req)
+-{
+-      struct ublk_queue *ubq = req->mq_hctx->driver_data;
+-
+-      if (ublk_nosrv_should_reissue_outstanding(ubq->dev))
+-              blk_mq_requeue_request(req, false);
+-      else
+-              __ublk_complete_rq(req);
+-}
+-
+-static void ublk_fail_rq_fn(struct kref *ref)
+-{
+-      struct ublk_rq_data *data = container_of(ref, struct ublk_rq_data,
+-                      ref);
+-      struct request *req = blk_mq_rq_from_pdu(data);
+-
+-      ublk_do_fail_rq(req);
+-}
+-
+-/*
+- * Since ublk_rq_task_work_cb always fails requests immediately during
+- * exiting, __ublk_fail_req() is only called from abort context during
+- * exiting. So lock is unnecessary.
+- *
+- * Also aborting may not be started yet, keep in mind that one failed
+- * request may be issued by block layer again.
+- */
+-static void __ublk_fail_req(struct ublk_queue *ubq, struct ublk_io *io,
+-              struct request *req)
+-{
+-      WARN_ON_ONCE(io->flags & UBLK_IO_FLAG_ACTIVE);
+-
+-      if (ublk_need_req_ref(ubq)) {
+-              struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
+-
+-              kref_put(&data->ref, ublk_fail_rq_fn);
+-      } else {
+-              ublk_do_fail_rq(req);
+-      }
+-}
+-
+ static void ubq_complete_io_cmd(struct ublk_io *io, int res,
+                               unsigned issue_flags)
+ {
+@@ -1639,10 +1583,26 @@ static void ublk_commit_completion(struc
+               ublk_put_req_ref(ubq, req);
+ }
++static void __ublk_fail_req(struct ublk_queue *ubq, struct ublk_io *io,
++              struct request *req)
++{
++      WARN_ON_ONCE(io->flags & UBLK_IO_FLAG_ACTIVE);
++
++      if (ublk_nosrv_should_reissue_outstanding(ubq->dev))
++              blk_mq_requeue_request(req, false);
++      else {
++              io->res = -EIO;
++              __ublk_complete_rq(req);
++      }
++}
++
+ /*
+- * Called from ubq_daemon context via cancel fn, meantime quiesce ublk
+- * blk-mq queue, so we are called exclusively with blk-mq and ubq_daemon
+- * context, so everything is serialized.
++ * Called from ublk char device release handler, when any uring_cmd is
++ * done, meantime request queue is "quiesced" since all inflight requests
++ * can't be completed because ublk server is dead.
++ *
++ * So no one can hold our request IO reference any more, simply ignore the
++ * reference, and complete the request immediately
+  */
+ static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq)
+ {
+@@ -1659,10 +1619,8 @@ static void ublk_abort_queue(struct ublk
+                        * will do it
+                        */
+                       rq = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], i);
+-                      if (rq && blk_mq_request_started(rq)) {
+-                              io->flags |= UBLK_IO_FLAG_ABORTED;
++                      if (rq && blk_mq_request_started(rq))
+                               __ublk_fail_req(ubq, io, rq);
+-                      }
+               }
+       }
+ }