]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
nvme-rdma: fix potential unbalanced freeze & unfreeze
authorMing Lei <ming.lei@redhat.com>
Tue, 11 Jul 2023 09:40:41 +0000 (17:40 +0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 16 Aug 2023 16:21:02 +0000 (18:21 +0200)
commit 29b434d1e49252b3ad56ad3197e47fafff5356a1 upstream.

Move start_freeze into nvme_rdma_configure_io_queues(), and there is
at least two benefits:

1) fix unbalanced freeze and unfreeze, since re-connection work may
fail or be broken by removal

2) IO during error recovery can be failfast quickly because nvme fabrics
unquiesces queues after teardown.

One side-effect is that !mpath request may timeout during connecting
because of queue topo change, but that looks not one big deal:

1) same problem exists with current code base

2) compared with !mpath, mpath use case is dominant

Fixes: 9f98772ba307 ("nvme-rdma: fix controller reset hang during traffic")
Cc: stable@vger.kernel.org
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Tested-by: Yi Zhang <yi.zhang@redhat.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
drivers/nvme/host/rdma.c

index b61924394032ac39b487fddcbc562c110c3bb139..825c961c6fd50cd3091294cb68aecc2fe8bd7b6d 100644 (file)
@@ -989,6 +989,7 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
                goto out_cleanup_connect_q;
 
        if (!new) {
+               nvme_start_freeze(&ctrl->ctrl);
                nvme_start_queues(&ctrl->ctrl);
                if (!nvme_wait_freeze_timeout(&ctrl->ctrl, NVME_IO_TIMEOUT)) {
                        /*
@@ -997,6 +998,7 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
                         * to be safe.
                         */
                        ret = -ENODEV;
+                       nvme_unfreeze(&ctrl->ctrl);
                        goto out_wait_freeze_timed_out;
                }
                blk_mq_update_nr_hw_queues(ctrl->ctrl.tagset,
@@ -1042,7 +1044,6 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
                bool remove)
 {
        if (ctrl->ctrl.queue_count > 1) {
-               nvme_start_freeze(&ctrl->ctrl);
                nvme_stop_queues(&ctrl->ctrl);
                nvme_sync_io_queues(&ctrl->ctrl);
                nvme_rdma_stop_io_queues(ctrl);