From 0e125c7a35b3a6635dba74f9ed7d0f2a51bf0d65 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Mon, 29 Jun 2020 20:01:05 -0400 Subject: [PATCH] Drop NVMe multipath patches for now They have a yet to be merged fix and all will get re-added once the fix lands upstream. Signed-off-by: Sasha Levin --- ...ossible-deadlock-when-i-o-is-blocked.patch | 124 ---------------- ...ix-deadlock-between-ana_work-and-sca.patch | 134 ------------------ ...-multipath-set-bdi-capabilities-once.patch | 51 ------- queue-4.19/series | 3 - ...ossible-deadlock-when-i-o-is-blocked.patch | 124 ---------------- ...ix-deadlock-between-ana_work-and-sca.patch | 134 ------------------ ...tipath-fix-deadlock-due-to-head-lock.patch | 124 ---------------- ...-multipath-set-bdi-capabilities-once.patch | 51 ------- queue-5.4/series | 4 - ...ossible-deadlock-when-i-o-is-blocked.patch | 124 ---------------- ...ix-deadlock-between-ana_work-and-sca.patch | 134 ------------------ ...tipath-fix-deadlock-due-to-head-lock.patch | 124 ---------------- ...-multipath-set-bdi-capabilities-once.patch | 70 --------- queue-5.7/series | 4 - 14 files changed, 1205 deletions(-) delete mode 100644 queue-4.19/nvme-fix-possible-deadlock-when-i-o-is-blocked.patch delete mode 100644 queue-4.19/nvme-multipath-fix-deadlock-between-ana_work-and-sca.patch delete mode 100644 queue-4.19/nvme-multipath-set-bdi-capabilities-once.patch delete mode 100644 queue-5.4/nvme-fix-possible-deadlock-when-i-o-is-blocked.patch delete mode 100644 queue-5.4/nvme-multipath-fix-deadlock-between-ana_work-and-sca.patch delete mode 100644 queue-5.4/nvme-multipath-fix-deadlock-due-to-head-lock.patch delete mode 100644 queue-5.4/nvme-multipath-set-bdi-capabilities-once.patch delete mode 100644 queue-5.7/nvme-fix-possible-deadlock-when-i-o-is-blocked.patch delete mode 100644 queue-5.7/nvme-multipath-fix-deadlock-between-ana_work-and-sca.patch delete mode 100644 queue-5.7/nvme-multipath-fix-deadlock-due-to-head-lock.patch delete mode 100644 queue-5.7/nvme-multipath-set-bdi-capabilities-once.patch diff --git a/queue-4.19/nvme-fix-possible-deadlock-when-i-o-is-blocked.patch b/queue-4.19/nvme-fix-possible-deadlock-when-i-o-is-blocked.patch deleted file mode 100644 index c2aa3fbea68..00000000000 --- a/queue-4.19/nvme-fix-possible-deadlock-when-i-o-is-blocked.patch +++ /dev/null @@ -1,124 +0,0 @@ -From afc6f75bcc7b635708e953a7cdc768d77f85f572 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Wed, 24 Jun 2020 01:53:08 -0700 -Subject: nvme: fix possible deadlock when I/O is blocked - -From: Sagi Grimberg - -[ Upstream commit 3b4b19721ec652ad2c4fe51dfbe5124212b5f581 ] - -Revert fab7772bfbcf ("nvme-multipath: revalidate nvme_ns_head gendisk -in nvme_validate_ns") - -When adding a new namespace to the head disk (via nvme_mpath_set_live) -we will see partition scan which triggers I/O on the mpath device node. -This process will usually be triggered from the scan_work which holds -the scan_lock. If I/O blocks (if we got ana change currently have only -available paths but none are accessible) this can deadlock on the head -disk bd_mutex as both partition scan I/O takes it, and head disk revalidation -takes it to check for resize (also triggered from scan_work on a different -path). See trace [1]. - -The mpath disk revalidation was originally added to detect online disk -size change, but this is no longer needed since commit cb224c3af4df -("nvme: Convert to use set_capacity_revalidate_and_notify") which already -updates resize info without unnecessarily revalidating the disk (the -mpath disk doesn't even implement .revalidate_disk fop). - -[1]: --- -kernel: INFO: task kworker/u65:9:494 blocked for more than 241 seconds. -kernel: Tainted: G OE 5.3.5-050305-generic #201910071830 -kernel: "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. -kernel: kworker/u65:9 D 0 494 2 0x80004000 -kernel: Workqueue: nvme-wq nvme_scan_work [nvme_core] -kernel: Call Trace: -kernel: __schedule+0x2b9/0x6c0 -kernel: schedule+0x42/0xb0 -kernel: schedule_preempt_disabled+0xe/0x10 -kernel: __mutex_lock.isra.0+0x182/0x4f0 -kernel: __mutex_lock_slowpath+0x13/0x20 -kernel: mutex_lock+0x2e/0x40 -kernel: revalidate_disk+0x63/0xa0 -kernel: __nvme_revalidate_disk+0xfe/0x110 [nvme_core] -kernel: nvme_revalidate_disk+0xa4/0x160 [nvme_core] -kernel: ? evict+0x14c/0x1b0 -kernel: revalidate_disk+0x2b/0xa0 -kernel: nvme_validate_ns+0x49/0x940 [nvme_core] -kernel: ? blk_mq_free_request+0xd2/0x100 -kernel: ? __nvme_submit_sync_cmd+0xbe/0x1e0 [nvme_core] -kernel: nvme_scan_work+0x24f/0x380 [nvme_core] -kernel: process_one_work+0x1db/0x380 -kernel: worker_thread+0x249/0x400 -kernel: kthread+0x104/0x140 -kernel: ? process_one_work+0x380/0x380 -kernel: ? kthread_park+0x80/0x80 -kernel: ret_from_fork+0x1f/0x40 -... -kernel: INFO: task kworker/u65:1:2630 blocked for more than 241 seconds. -kernel: Tainted: G OE 5.3.5-050305-generic #201910071830 -kernel: "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. -kernel: kworker/u65:1 D 0 2630 2 0x80004000 -kernel: Workqueue: nvme-wq nvme_scan_work [nvme_core] -kernel: Call Trace: -kernel: __schedule+0x2b9/0x6c0 -kernel: schedule+0x42/0xb0 -kernel: io_schedule+0x16/0x40 -kernel: do_read_cache_page+0x438/0x830 -kernel: ? __switch_to_asm+0x34/0x70 -kernel: ? file_fdatawait_range+0x30/0x30 -kernel: read_cache_page+0x12/0x20 -kernel: read_dev_sector+0x27/0xc0 -kernel: read_lba+0xc1/0x220 -kernel: ? kmem_cache_alloc_trace+0x19c/0x230 -kernel: efi_partition+0x1e6/0x708 -kernel: ? vsnprintf+0x39e/0x4e0 -kernel: ? snprintf+0x49/0x60 -kernel: check_partition+0x154/0x244 -kernel: rescan_partitions+0xae/0x280 -kernel: __blkdev_get+0x40f/0x560 -kernel: blkdev_get+0x3d/0x140 -kernel: __device_add_disk+0x388/0x480 -kernel: device_add_disk+0x13/0x20 -kernel: nvme_mpath_set_live+0x119/0x140 [nvme_core] -kernel: nvme_update_ns_ana_state+0x5c/0x60 [nvme_core] -kernel: nvme_set_ns_ana_state+0x1e/0x30 [nvme_core] -kernel: nvme_parse_ana_log+0xa1/0x180 [nvme_core] -kernel: ? nvme_update_ns_ana_state+0x60/0x60 [nvme_core] -kernel: nvme_mpath_add_disk+0x47/0x90 [nvme_core] -kernel: nvme_validate_ns+0x396/0x940 [nvme_core] -kernel: ? blk_mq_free_request+0xd2/0x100 -kernel: nvme_scan_work+0x24f/0x380 [nvme_core] -kernel: process_one_work+0x1db/0x380 -kernel: worker_thread+0x249/0x400 -kernel: kthread+0x104/0x140 -kernel: ? process_one_work+0x380/0x380 -kernel: ? kthread_park+0x80/0x80 -kernel: ret_from_fork+0x1f/0x40 --- - -Fixes: fab7772bfbcf ("nvme-multipath: revalidate nvme_ns_head gendisk -in nvme_validate_ns") -Signed-off-by: Anton Eidelman -Signed-off-by: Sagi Grimberg -Signed-off-by: Christoph Hellwig -Signed-off-by: Sasha Levin ---- - drivers/nvme/host/core.c | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c -index 0d60f2f8f3eec..5c9326777334f 100644 ---- a/drivers/nvme/host/core.c -+++ b/drivers/nvme/host/core.c -@@ -1602,7 +1602,6 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) - if (ns->head->disk) { - nvme_update_disk_info(ns->head->disk, ns, id); - blk_queue_stack_limits(ns->head->disk->queue, ns->queue); -- revalidate_disk(ns->head->disk); - } - #endif - } --- -2.25.1 - diff --git a/queue-4.19/nvme-multipath-fix-deadlock-between-ana_work-and-sca.patch b/queue-4.19/nvme-multipath-fix-deadlock-between-ana_work-and-sca.patch deleted file mode 100644 index 42818c7e8c8..00000000000 --- a/queue-4.19/nvme-multipath-fix-deadlock-between-ana_work-and-sca.patch +++ /dev/null @@ -1,134 +0,0 @@ -From 90ab3045208e61be9f8e0342975c68bcbf57ecf4 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Wed, 24 Jun 2020 01:53:09 -0700 -Subject: nvme-multipath: fix deadlock between ana_work and scan_work - -From: Anton Eidelman - -[ Upstream commit 489dd102a2c7c94d783a35f9412eb085b8da1aa4 ] - -When scan_work calls nvme_mpath_add_disk() this holds ana_lock -and invokes nvme_parse_ana_log(), which may issue IO -in device_add_disk() and hang waiting for an accessible path. -While nvme_mpath_set_live() only called when nvme_state_is_live(), -a transition may cause NVME_SC_ANA_TRANSITION and requeue the IO. - -In order to recover and complete the IO ana_work on the same ctrl -should be able to update the path state and remove NVME_NS_ANA_PENDING. - -The deadlock occurs because scan_work keeps holding ana_lock, -so ana_work hangs [1]. - -Fix: -Now nvme_mpath_add_disk() uses nvme_parse_ana_log() to obtain a copy -of the ANA group desc, and then calls nvme_update_ns_ana_state() without -holding ana_lock. - -[1]: -kernel: Workqueue: nvme-wq nvme_scan_work [nvme_core] -kernel: Call Trace: -kernel: __schedule+0x2b9/0x6c0 -kernel: schedule+0x42/0xb0 -kernel: io_schedule+0x16/0x40 -kernel: do_read_cache_page+0x438/0x830 -kernel: read_cache_page+0x12/0x20 -kernel: read_dev_sector+0x27/0xc0 -kernel: read_lba+0xc1/0x220 -kernel: efi_partition+0x1e6/0x708 -kernel: check_partition+0x154/0x244 -kernel: rescan_partitions+0xae/0x280 -kernel: __blkdev_get+0x40f/0x560 -kernel: blkdev_get+0x3d/0x140 -kernel: __device_add_disk+0x388/0x480 -kernel: device_add_disk+0x13/0x20 -kernel: nvme_mpath_set_live+0x119/0x140 [nvme_core] -kernel: nvme_update_ns_ana_state+0x5c/0x60 [nvme_core] -kernel: nvme_set_ns_ana_state+0x1e/0x30 [nvme_core] -kernel: nvme_parse_ana_log+0xa1/0x180 [nvme_core] -kernel: nvme_mpath_add_disk+0x47/0x90 [nvme_core] -kernel: nvme_validate_ns+0x396/0x940 [nvme_core] -kernel: nvme_scan_work+0x24f/0x380 [nvme_core] -kernel: process_one_work+0x1db/0x380 -kernel: worker_thread+0x249/0x400 -kernel: kthread+0x104/0x140 - -kernel: Workqueue: nvme-wq nvme_ana_work [nvme_core] -kernel: Call Trace: -kernel: __schedule+0x2b9/0x6c0 -kernel: schedule+0x42/0xb0 -kernel: schedule_preempt_disabled+0xe/0x10 -kernel: __mutex_lock.isra.0+0x182/0x4f0 -kernel: ? __switch_to_asm+0x34/0x70 -kernel: ? select_task_rq_fair+0x1aa/0x5c0 -kernel: ? kvm_sched_clock_read+0x11/0x20 -kernel: ? sched_clock+0x9/0x10 -kernel: __mutex_lock_slowpath+0x13/0x20 -kernel: mutex_lock+0x2e/0x40 -kernel: nvme_read_ana_log+0x3a/0x100 [nvme_core] -kernel: nvme_ana_work+0x15/0x20 [nvme_core] -kernel: process_one_work+0x1db/0x380 -kernel: worker_thread+0x4d/0x400 -kernel: kthread+0x104/0x140 -kernel: ? process_one_work+0x380/0x380 -kernel: ? kthread_park+0x80/0x80 -kernel: ret_from_fork+0x35/0x40 - -Fixes: 0d0b660f214d ("nvme: add ANA support") -Signed-off-by: Anton Eidelman -Signed-off-by: Sagi Grimberg -Signed-off-by: Christoph Hellwig -Signed-off-by: Sasha Levin ---- - drivers/nvme/host/multipath.c | 24 ++++++++++++++++-------- - 1 file changed, 16 insertions(+), 8 deletions(-) - -diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c -index 6f584a9515f42..3ad6183c5e6b4 100644 ---- a/drivers/nvme/host/multipath.c -+++ b/drivers/nvme/host/multipath.c -@@ -496,26 +496,34 @@ static ssize_t ana_state_show(struct device *dev, struct device_attribute *attr, - } - DEVICE_ATTR_RO(ana_state); - --static int nvme_set_ns_ana_state(struct nvme_ctrl *ctrl, -+static int nvme_lookup_ana_group_desc(struct nvme_ctrl *ctrl, - struct nvme_ana_group_desc *desc, void *data) - { -- struct nvme_ns *ns = data; -+ struct nvme_ana_group_desc *dst = data; - -- if (ns->ana_grpid == le32_to_cpu(desc->grpid)) { -- nvme_update_ns_ana_state(desc, ns); -- return -ENXIO; /* just break out of the loop */ -- } -+ if (desc->grpid != dst->grpid) -+ return 0; - -- return 0; -+ *dst = *desc; -+ return -ENXIO; /* just break out of the loop */ - } - - void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id) - { - if (nvme_ctrl_use_ana(ns->ctrl)) { -+ struct nvme_ana_group_desc desc = { -+ .grpid = id->anagrpid, -+ .state = 0, -+ }; -+ - mutex_lock(&ns->ctrl->ana_lock); - ns->ana_grpid = le32_to_cpu(id->anagrpid); -- nvme_parse_ana_log(ns->ctrl, ns, nvme_set_ns_ana_state); -+ nvme_parse_ana_log(ns->ctrl, &desc, nvme_lookup_ana_group_desc); - mutex_unlock(&ns->ctrl->ana_lock); -+ if (desc.state) { -+ /* found the group desc: update */ -+ nvme_update_ns_ana_state(&desc, ns); -+ } - } else { - mutex_lock(&ns->head->lock); - ns->ana_state = NVME_ANA_OPTIMIZED; --- -2.25.1 - diff --git a/queue-4.19/nvme-multipath-set-bdi-capabilities-once.patch b/queue-4.19/nvme-multipath-set-bdi-capabilities-once.patch deleted file mode 100644 index bdf0cf79932..00000000000 --- a/queue-4.19/nvme-multipath-set-bdi-capabilities-once.patch +++ /dev/null @@ -1,51 +0,0 @@ -From d18a99a506620a80566806c5d65a3fd7ee875792 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Thu, 9 Apr 2020 09:09:04 -0700 -Subject: nvme-multipath: set bdi capabilities once - -From: Keith Busch - -[ Upstream commit b2ce4d90690bd29ce5b554e203cd03682dd59697 ] - -The queues' backing device info capabilities don't change with each -namespace revalidation. Set it only when each path's request_queue -is initially added to a multipath queue. - -Signed-off-by: Keith Busch -Reviewed-by: Sagi Grimberg -Signed-off-by: Christoph Hellwig -Signed-off-by: Jens Axboe -Signed-off-by: Sasha Levin ---- - drivers/nvme/host/multipath.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c -index 588864beabd80..6f584a9515f42 100644 ---- a/drivers/nvme/host/multipath.c -+++ b/drivers/nvme/host/multipath.c -@@ -11,6 +11,7 @@ - * more details. - */ - -+#include - #include - #include - #include "nvme.h" -@@ -521,6 +522,13 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id) - nvme_mpath_set_live(ns); - mutex_unlock(&ns->head->lock); - } -+ -+ if (bdi_cap_stable_pages_required(ns->queue->backing_dev_info)) { -+ struct backing_dev_info *info = -+ ns->head->disk->queue->backing_dev_info; -+ -+ info->capabilities |= BDI_CAP_STABLE_WRITES; -+ } - } - - void nvme_mpath_remove_disk(struct nvme_ns_head *head) --- -2.25.1 - diff --git a/queue-4.19/series b/queue-4.19/series index 7afe836d37a..8a4cb67da9e 100644 --- a/queue-4.19/series +++ b/queue-4.19/series @@ -78,9 +78,6 @@ net-qed-fix-excessive-qm-ilt-lines-consumption.patch cxgb4-move-handling-l2t-arp-failures-to-caller.patch arm-imx5-add-missing-put_device-call-in-imx_suspend_.patch usb-gadget-udc-potential-oops-in-error-handling-code.patch -nvme-multipath-set-bdi-capabilities-once.patch -nvme-fix-possible-deadlock-when-i-o-is-blocked.patch -nvme-multipath-fix-deadlock-between-ana_work-and-sca.patch netfilter-ipset-fix-unaligned-atomic-access.patch net-bcmgenet-use-hardware-padding-of-runt-frames.patch i2c-fsi-fix-the-port-number-field-in-status-register.patch diff --git a/queue-5.4/nvme-fix-possible-deadlock-when-i-o-is-blocked.patch b/queue-5.4/nvme-fix-possible-deadlock-when-i-o-is-blocked.patch deleted file mode 100644 index 34b2b1cd0c0..00000000000 --- a/queue-5.4/nvme-fix-possible-deadlock-when-i-o-is-blocked.patch +++ /dev/null @@ -1,124 +0,0 @@ -From ae2e1a951903da948ef80bad65edd4af30c1eac8 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Wed, 24 Jun 2020 01:53:08 -0700 -Subject: nvme: fix possible deadlock when I/O is blocked - -From: Sagi Grimberg - -[ Upstream commit 3b4b19721ec652ad2c4fe51dfbe5124212b5f581 ] - -Revert fab7772bfbcf ("nvme-multipath: revalidate nvme_ns_head gendisk -in nvme_validate_ns") - -When adding a new namespace to the head disk (via nvme_mpath_set_live) -we will see partition scan which triggers I/O on the mpath device node. -This process will usually be triggered from the scan_work which holds -the scan_lock. If I/O blocks (if we got ana change currently have only -available paths but none are accessible) this can deadlock on the head -disk bd_mutex as both partition scan I/O takes it, and head disk revalidation -takes it to check for resize (also triggered from scan_work on a different -path). See trace [1]. - -The mpath disk revalidation was originally added to detect online disk -size change, but this is no longer needed since commit cb224c3af4df -("nvme: Convert to use set_capacity_revalidate_and_notify") which already -updates resize info without unnecessarily revalidating the disk (the -mpath disk doesn't even implement .revalidate_disk fop). - -[1]: --- -kernel: INFO: task kworker/u65:9:494 blocked for more than 241 seconds. -kernel: Tainted: G OE 5.3.5-050305-generic #201910071830 -kernel: "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. -kernel: kworker/u65:9 D 0 494 2 0x80004000 -kernel: Workqueue: nvme-wq nvme_scan_work [nvme_core] -kernel: Call Trace: -kernel: __schedule+0x2b9/0x6c0 -kernel: schedule+0x42/0xb0 -kernel: schedule_preempt_disabled+0xe/0x10 -kernel: __mutex_lock.isra.0+0x182/0x4f0 -kernel: __mutex_lock_slowpath+0x13/0x20 -kernel: mutex_lock+0x2e/0x40 -kernel: revalidate_disk+0x63/0xa0 -kernel: __nvme_revalidate_disk+0xfe/0x110 [nvme_core] -kernel: nvme_revalidate_disk+0xa4/0x160 [nvme_core] -kernel: ? evict+0x14c/0x1b0 -kernel: revalidate_disk+0x2b/0xa0 -kernel: nvme_validate_ns+0x49/0x940 [nvme_core] -kernel: ? blk_mq_free_request+0xd2/0x100 -kernel: ? __nvme_submit_sync_cmd+0xbe/0x1e0 [nvme_core] -kernel: nvme_scan_work+0x24f/0x380 [nvme_core] -kernel: process_one_work+0x1db/0x380 -kernel: worker_thread+0x249/0x400 -kernel: kthread+0x104/0x140 -kernel: ? process_one_work+0x380/0x380 -kernel: ? kthread_park+0x80/0x80 -kernel: ret_from_fork+0x1f/0x40 -... -kernel: INFO: task kworker/u65:1:2630 blocked for more than 241 seconds. -kernel: Tainted: G OE 5.3.5-050305-generic #201910071830 -kernel: "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. -kernel: kworker/u65:1 D 0 2630 2 0x80004000 -kernel: Workqueue: nvme-wq nvme_scan_work [nvme_core] -kernel: Call Trace: -kernel: __schedule+0x2b9/0x6c0 -kernel: schedule+0x42/0xb0 -kernel: io_schedule+0x16/0x40 -kernel: do_read_cache_page+0x438/0x830 -kernel: ? __switch_to_asm+0x34/0x70 -kernel: ? file_fdatawait_range+0x30/0x30 -kernel: read_cache_page+0x12/0x20 -kernel: read_dev_sector+0x27/0xc0 -kernel: read_lba+0xc1/0x220 -kernel: ? kmem_cache_alloc_trace+0x19c/0x230 -kernel: efi_partition+0x1e6/0x708 -kernel: ? vsnprintf+0x39e/0x4e0 -kernel: ? snprintf+0x49/0x60 -kernel: check_partition+0x154/0x244 -kernel: rescan_partitions+0xae/0x280 -kernel: __blkdev_get+0x40f/0x560 -kernel: blkdev_get+0x3d/0x140 -kernel: __device_add_disk+0x388/0x480 -kernel: device_add_disk+0x13/0x20 -kernel: nvme_mpath_set_live+0x119/0x140 [nvme_core] -kernel: nvme_update_ns_ana_state+0x5c/0x60 [nvme_core] -kernel: nvme_set_ns_ana_state+0x1e/0x30 [nvme_core] -kernel: nvme_parse_ana_log+0xa1/0x180 [nvme_core] -kernel: ? nvme_update_ns_ana_state+0x60/0x60 [nvme_core] -kernel: nvme_mpath_add_disk+0x47/0x90 [nvme_core] -kernel: nvme_validate_ns+0x396/0x940 [nvme_core] -kernel: ? blk_mq_free_request+0xd2/0x100 -kernel: nvme_scan_work+0x24f/0x380 [nvme_core] -kernel: process_one_work+0x1db/0x380 -kernel: worker_thread+0x249/0x400 -kernel: kthread+0x104/0x140 -kernel: ? process_one_work+0x380/0x380 -kernel: ? kthread_park+0x80/0x80 -kernel: ret_from_fork+0x1f/0x40 --- - -Fixes: fab7772bfbcf ("nvme-multipath: revalidate nvme_ns_head gendisk -in nvme_validate_ns") -Signed-off-by: Anton Eidelman -Signed-off-by: Sagi Grimberg -Signed-off-by: Christoph Hellwig -Signed-off-by: Sasha Levin ---- - drivers/nvme/host/core.c | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c -index d4b388793f40d..c44c00b9e1d85 100644 ---- a/drivers/nvme/host/core.c -+++ b/drivers/nvme/host/core.c -@@ -1870,7 +1870,6 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) - if (ns->head->disk) { - nvme_update_disk_info(ns->head->disk, ns, id); - blk_queue_stack_limits(ns->head->disk->queue, ns->queue); -- revalidate_disk(ns->head->disk); - } - #endif - } --- -2.25.1 - diff --git a/queue-5.4/nvme-multipath-fix-deadlock-between-ana_work-and-sca.patch b/queue-5.4/nvme-multipath-fix-deadlock-between-ana_work-and-sca.patch deleted file mode 100644 index 761029e908a..00000000000 --- a/queue-5.4/nvme-multipath-fix-deadlock-between-ana_work-and-sca.patch +++ /dev/null @@ -1,134 +0,0 @@ -From f954b14a2e48ad296fe233dcc6f04c824aef6fda Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Wed, 24 Jun 2020 01:53:09 -0700 -Subject: nvme-multipath: fix deadlock between ana_work and scan_work - -From: Anton Eidelman - -[ Upstream commit 489dd102a2c7c94d783a35f9412eb085b8da1aa4 ] - -When scan_work calls nvme_mpath_add_disk() this holds ana_lock -and invokes nvme_parse_ana_log(), which may issue IO -in device_add_disk() and hang waiting for an accessible path. -While nvme_mpath_set_live() only called when nvme_state_is_live(), -a transition may cause NVME_SC_ANA_TRANSITION and requeue the IO. - -In order to recover and complete the IO ana_work on the same ctrl -should be able to update the path state and remove NVME_NS_ANA_PENDING. - -The deadlock occurs because scan_work keeps holding ana_lock, -so ana_work hangs [1]. - -Fix: -Now nvme_mpath_add_disk() uses nvme_parse_ana_log() to obtain a copy -of the ANA group desc, and then calls nvme_update_ns_ana_state() without -holding ana_lock. - -[1]: -kernel: Workqueue: nvme-wq nvme_scan_work [nvme_core] -kernel: Call Trace: -kernel: __schedule+0x2b9/0x6c0 -kernel: schedule+0x42/0xb0 -kernel: io_schedule+0x16/0x40 -kernel: do_read_cache_page+0x438/0x830 -kernel: read_cache_page+0x12/0x20 -kernel: read_dev_sector+0x27/0xc0 -kernel: read_lba+0xc1/0x220 -kernel: efi_partition+0x1e6/0x708 -kernel: check_partition+0x154/0x244 -kernel: rescan_partitions+0xae/0x280 -kernel: __blkdev_get+0x40f/0x560 -kernel: blkdev_get+0x3d/0x140 -kernel: __device_add_disk+0x388/0x480 -kernel: device_add_disk+0x13/0x20 -kernel: nvme_mpath_set_live+0x119/0x140 [nvme_core] -kernel: nvme_update_ns_ana_state+0x5c/0x60 [nvme_core] -kernel: nvme_set_ns_ana_state+0x1e/0x30 [nvme_core] -kernel: nvme_parse_ana_log+0xa1/0x180 [nvme_core] -kernel: nvme_mpath_add_disk+0x47/0x90 [nvme_core] -kernel: nvme_validate_ns+0x396/0x940 [nvme_core] -kernel: nvme_scan_work+0x24f/0x380 [nvme_core] -kernel: process_one_work+0x1db/0x380 -kernel: worker_thread+0x249/0x400 -kernel: kthread+0x104/0x140 - -kernel: Workqueue: nvme-wq nvme_ana_work [nvme_core] -kernel: Call Trace: -kernel: __schedule+0x2b9/0x6c0 -kernel: schedule+0x42/0xb0 -kernel: schedule_preempt_disabled+0xe/0x10 -kernel: __mutex_lock.isra.0+0x182/0x4f0 -kernel: ? __switch_to_asm+0x34/0x70 -kernel: ? select_task_rq_fair+0x1aa/0x5c0 -kernel: ? kvm_sched_clock_read+0x11/0x20 -kernel: ? sched_clock+0x9/0x10 -kernel: __mutex_lock_slowpath+0x13/0x20 -kernel: mutex_lock+0x2e/0x40 -kernel: nvme_read_ana_log+0x3a/0x100 [nvme_core] -kernel: nvme_ana_work+0x15/0x20 [nvme_core] -kernel: process_one_work+0x1db/0x380 -kernel: worker_thread+0x4d/0x400 -kernel: kthread+0x104/0x140 -kernel: ? process_one_work+0x380/0x380 -kernel: ? kthread_park+0x80/0x80 -kernel: ret_from_fork+0x35/0x40 - -Fixes: 0d0b660f214d ("nvme: add ANA support") -Signed-off-by: Anton Eidelman -Signed-off-by: Sagi Grimberg -Signed-off-by: Christoph Hellwig -Signed-off-by: Sasha Levin ---- - drivers/nvme/host/multipath.c | 24 ++++++++++++++++-------- - 1 file changed, 16 insertions(+), 8 deletions(-) - -diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c -index c17cf8f00f536..0f08c15553a64 100644 ---- a/drivers/nvme/host/multipath.c -+++ b/drivers/nvme/host/multipath.c -@@ -641,26 +641,34 @@ static ssize_t ana_state_show(struct device *dev, struct device_attribute *attr, - } - DEVICE_ATTR_RO(ana_state); - --static int nvme_set_ns_ana_state(struct nvme_ctrl *ctrl, -+static int nvme_lookup_ana_group_desc(struct nvme_ctrl *ctrl, - struct nvme_ana_group_desc *desc, void *data) - { -- struct nvme_ns *ns = data; -+ struct nvme_ana_group_desc *dst = data; - -- if (ns->ana_grpid == le32_to_cpu(desc->grpid)) { -- nvme_update_ns_ana_state(desc, ns); -- return -ENXIO; /* just break out of the loop */ -- } -+ if (desc->grpid != dst->grpid) -+ return 0; - -- return 0; -+ *dst = *desc; -+ return -ENXIO; /* just break out of the loop */ - } - - void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id) - { - if (nvme_ctrl_use_ana(ns->ctrl)) { -+ struct nvme_ana_group_desc desc = { -+ .grpid = id->anagrpid, -+ .state = 0, -+ }; -+ - mutex_lock(&ns->ctrl->ana_lock); - ns->ana_grpid = le32_to_cpu(id->anagrpid); -- nvme_parse_ana_log(ns->ctrl, ns, nvme_set_ns_ana_state); -+ nvme_parse_ana_log(ns->ctrl, &desc, nvme_lookup_ana_group_desc); - mutex_unlock(&ns->ctrl->ana_lock); -+ if (desc.state) { -+ /* found the group desc: update */ -+ nvme_update_ns_ana_state(&desc, ns); -+ } - } else { - mutex_lock(&ns->head->lock); - ns->ana_state = NVME_ANA_OPTIMIZED; --- -2.25.1 - diff --git a/queue-5.4/nvme-multipath-fix-deadlock-due-to-head-lock.patch b/queue-5.4/nvme-multipath-fix-deadlock-due-to-head-lock.patch deleted file mode 100644 index 2f5b155564f..00000000000 --- a/queue-5.4/nvme-multipath-fix-deadlock-due-to-head-lock.patch +++ /dev/null @@ -1,124 +0,0 @@ -From 56d18bff5ef7a8498c7028ad568c34078611296e Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Wed, 24 Jun 2020 01:53:11 -0700 -Subject: nvme-multipath: fix deadlock due to head->lock - -From: Anton Eidelman - -[ Upstream commit d8a22f85609fadb46ba699e0136cc3ebdeebff79 ] - -In the following scenario scan_work and ana_work will deadlock: - -When scan_work calls nvme_mpath_add_disk() this holds ana_lock -and invokes nvme_parse_ana_log(), which may issue IO -in device_add_disk() and hang waiting for an accessible path. - -While nvme_mpath_set_live() only called when nvme_state_is_live(), -a transition may cause NVME_SC_ANA_TRANSITION and requeue the IO. - -Since nvme_mpath_set_live() holds ns->head->lock, an ana_work on -ANY ctrl will not be able to complete nvme_mpath_set_live() -on the same ns->head, which is required in order to update -the new accessible path and remove NVME_NS_ANA_PENDING.. -Therefore IO never completes: deadlock [1]. - -Fix: -Move device_add_disk out of the head->lock and protect it with an -atomic test_and_set for a new NVME_NS_HEAD_HAS_DISK bit. - -[1]: -kernel: INFO: task kworker/u8:2:160 blocked for more than 120 seconds. -kernel: Tainted: G OE 5.3.5-050305-generic #201910071830 -kernel: "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. -kernel: kworker/u8:2 D 0 160 2 0x80004000 -kernel: Workqueue: nvme-wq nvme_ana_work [nvme_core] -kernel: Call Trace: -kernel: __schedule+0x2b9/0x6c0 -kernel: schedule+0x42/0xb0 -kernel: schedule_preempt_disabled+0xe/0x10 -kernel: __mutex_lock.isra.0+0x182/0x4f0 -kernel: __mutex_lock_slowpath+0x13/0x20 -kernel: mutex_lock+0x2e/0x40 -kernel: nvme_update_ns_ana_state+0x22/0x60 [nvme_core] -kernel: nvme_update_ana_state+0xca/0xe0 [nvme_core] -kernel: nvme_parse_ana_log+0xa1/0x180 [nvme_core] -kernel: nvme_read_ana_log+0x76/0x100 [nvme_core] -kernel: nvme_ana_work+0x15/0x20 [nvme_core] -kernel: process_one_work+0x1db/0x380 -kernel: worker_thread+0x4d/0x400 -kernel: kthread+0x104/0x140 -kernel: ret_from_fork+0x35/0x40 -kernel: INFO: task kworker/u8:4:439 blocked for more than 120 seconds. -kernel: Tainted: G OE 5.3.5-050305-generic #201910071830 -kernel: "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. -kernel: kworker/u8:4 D 0 439 2 0x80004000 -kernel: Workqueue: nvme-wq nvme_scan_work [nvme_core] -kernel: Call Trace: -kernel: __schedule+0x2b9/0x6c0 -kernel: schedule+0x42/0xb0 -kernel: io_schedule+0x16/0x40 -kernel: do_read_cache_page+0x438/0x830 -kernel: read_cache_page+0x12/0x20 -kernel: read_dev_sector+0x27/0xc0 -kernel: read_lba+0xc1/0x220 -kernel: efi_partition+0x1e6/0x708 -kernel: check_partition+0x154/0x244 -kernel: rescan_partitions+0xae/0x280 -kernel: __blkdev_get+0x40f/0x560 -kernel: blkdev_get+0x3d/0x140 -kernel: __device_add_disk+0x388/0x480 -kernel: device_add_disk+0x13/0x20 -kernel: nvme_mpath_set_live+0x119/0x140 [nvme_core] -kernel: nvme_update_ns_ana_state+0x5c/0x60 [nvme_core] -kernel: nvme_mpath_add_disk+0xbe/0x100 [nvme_core] -kernel: nvme_validate_ns+0x396/0x940 [nvme_core] -kernel: nvme_scan_work+0x256/0x390 [nvme_core] -kernel: process_one_work+0x1db/0x380 -kernel: worker_thread+0x4d/0x400 -kernel: kthread+0x104/0x140 -kernel: ret_from_fork+0x35/0x40 - -Fixes: 0d0b660f214d ("nvme: add ANA support") -Signed-off-by: Anton Eidelman -Signed-off-by: Sagi Grimberg -Signed-off-by: Christoph Hellwig -Signed-off-by: Sasha Levin ---- - drivers/nvme/host/multipath.c | 4 ++-- - drivers/nvme/host/nvme.h | 2 ++ - 2 files changed, 4 insertions(+), 2 deletions(-) - -diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c -index 18f0a05c74b56..574b52e911f08 100644 ---- a/drivers/nvme/host/multipath.c -+++ b/drivers/nvme/host/multipath.c -@@ -417,11 +417,11 @@ static void nvme_mpath_set_live(struct nvme_ns *ns) - if (!head->disk) - return; - -- mutex_lock(&head->lock); -- if (!(head->disk->flags & GENHD_FL_UP)) -+ if (!test_and_set_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) - device_add_disk(&head->subsys->dev, head->disk, - nvme_ns_id_attr_groups); - -+ mutex_lock(&head->lock); - if (nvme_path_is_optimized(ns)) { - int node, srcu_idx; - -diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h -index 22e8401352c22..ed02260862cb5 100644 ---- a/drivers/nvme/host/nvme.h -+++ b/drivers/nvme/host/nvme.h -@@ -345,6 +345,8 @@ struct nvme_ns_head { - spinlock_t requeue_lock; - struct work_struct requeue_work; - struct mutex lock; -+ unsigned long flags; -+#define NVME_NSHEAD_DISK_LIVE 0 - struct nvme_ns __rcu *current_path[]; - #endif - }; --- -2.25.1 - diff --git a/queue-5.4/nvme-multipath-set-bdi-capabilities-once.patch b/queue-5.4/nvme-multipath-set-bdi-capabilities-once.patch deleted file mode 100644 index 641e4c04f83..00000000000 --- a/queue-5.4/nvme-multipath-set-bdi-capabilities-once.patch +++ /dev/null @@ -1,51 +0,0 @@ -From c260fa891f7a213e9260b4cda7a21bda02e0f42a Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Thu, 9 Apr 2020 09:09:04 -0700 -Subject: nvme-multipath: set bdi capabilities once - -From: Keith Busch - -[ Upstream commit b2ce4d90690bd29ce5b554e203cd03682dd59697 ] - -The queues' backing device info capabilities don't change with each -namespace revalidation. Set it only when each path's request_queue -is initially added to a multipath queue. - -Signed-off-by: Keith Busch -Reviewed-by: Sagi Grimberg -Signed-off-by: Christoph Hellwig -Signed-off-by: Jens Axboe -Signed-off-by: Sasha Levin ---- - drivers/nvme/host/multipath.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c -index 56caddeabb5e5..c17cf8f00f536 100644 ---- a/drivers/nvme/host/multipath.c -+++ b/drivers/nvme/host/multipath.c -@@ -3,6 +3,7 @@ - * Copyright (c) 2017-2018 Christoph Hellwig. - */ - -+#include - #include - #include - #include "nvme.h" -@@ -666,6 +667,13 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id) - nvme_mpath_set_live(ns); - mutex_unlock(&ns->head->lock); - } -+ -+ if (bdi_cap_stable_pages_required(ns->queue->backing_dev_info)) { -+ struct backing_dev_info *info = -+ ns->head->disk->queue->backing_dev_info; -+ -+ info->capabilities |= BDI_CAP_STABLE_WRITES; -+ } - } - - void nvme_mpath_remove_disk(struct nvme_ns_head *head) --- -2.25.1 - diff --git a/queue-5.4/series b/queue-5.4/series index 3241e3f800e..05335e04261 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -101,11 +101,7 @@ arm-imx5-add-missing-put_device-call-in-imx_suspend_.patch scsi-lpfc-avoid-another-null-dereference-in-lpfc_sli.patch usb-gadget-udc-potential-oops-in-error-handling-code.patch usb-renesas_usbhs-getting-residue-from-callback_resu.patch -nvme-multipath-set-bdi-capabilities-once.patch -nvme-fix-possible-deadlock-when-i-o-is-blocked.patch -nvme-multipath-fix-deadlock-between-ana_work-and-sca.patch nvme-don-t-protect-ns-mutation-with-ns-head-lock.patch -nvme-multipath-fix-deadlock-due-to-head-lock.patch netfilter-ipset-fix-unaligned-atomic-access.patch net-bcmgenet-use-hardware-padding-of-runt-frames.patch clk-sifive-allocate-sufficient-memory-for-struct-__p.patch diff --git a/queue-5.7/nvme-fix-possible-deadlock-when-i-o-is-blocked.patch b/queue-5.7/nvme-fix-possible-deadlock-when-i-o-is-blocked.patch deleted file mode 100644 index 6dc24624694..00000000000 --- a/queue-5.7/nvme-fix-possible-deadlock-when-i-o-is-blocked.patch +++ /dev/null @@ -1,124 +0,0 @@ -From 244efd6aebd7ac7e9a6c4865f07b8737ce3bfadd Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Wed, 24 Jun 2020 01:53:08 -0700 -Subject: nvme: fix possible deadlock when I/O is blocked - -From: Sagi Grimberg - -[ Upstream commit 3b4b19721ec652ad2c4fe51dfbe5124212b5f581 ] - -Revert fab7772bfbcf ("nvme-multipath: revalidate nvme_ns_head gendisk -in nvme_validate_ns") - -When adding a new namespace to the head disk (via nvme_mpath_set_live) -we will see partition scan which triggers I/O on the mpath device node. -This process will usually be triggered from the scan_work which holds -the scan_lock. If I/O blocks (if we got ana change currently have only -available paths but none are accessible) this can deadlock on the head -disk bd_mutex as both partition scan I/O takes it, and head disk revalidation -takes it to check for resize (also triggered from scan_work on a different -path). See trace [1]. - -The mpath disk revalidation was originally added to detect online disk -size change, but this is no longer needed since commit cb224c3af4df -("nvme: Convert to use set_capacity_revalidate_and_notify") which already -updates resize info without unnecessarily revalidating the disk (the -mpath disk doesn't even implement .revalidate_disk fop). - -[1]: --- -kernel: INFO: task kworker/u65:9:494 blocked for more than 241 seconds. -kernel: Tainted: G OE 5.3.5-050305-generic #201910071830 -kernel: "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. -kernel: kworker/u65:9 D 0 494 2 0x80004000 -kernel: Workqueue: nvme-wq nvme_scan_work [nvme_core] -kernel: Call Trace: -kernel: __schedule+0x2b9/0x6c0 -kernel: schedule+0x42/0xb0 -kernel: schedule_preempt_disabled+0xe/0x10 -kernel: __mutex_lock.isra.0+0x182/0x4f0 -kernel: __mutex_lock_slowpath+0x13/0x20 -kernel: mutex_lock+0x2e/0x40 -kernel: revalidate_disk+0x63/0xa0 -kernel: __nvme_revalidate_disk+0xfe/0x110 [nvme_core] -kernel: nvme_revalidate_disk+0xa4/0x160 [nvme_core] -kernel: ? evict+0x14c/0x1b0 -kernel: revalidate_disk+0x2b/0xa0 -kernel: nvme_validate_ns+0x49/0x940 [nvme_core] -kernel: ? blk_mq_free_request+0xd2/0x100 -kernel: ? __nvme_submit_sync_cmd+0xbe/0x1e0 [nvme_core] -kernel: nvme_scan_work+0x24f/0x380 [nvme_core] -kernel: process_one_work+0x1db/0x380 -kernel: worker_thread+0x249/0x400 -kernel: kthread+0x104/0x140 -kernel: ? process_one_work+0x380/0x380 -kernel: ? kthread_park+0x80/0x80 -kernel: ret_from_fork+0x1f/0x40 -... -kernel: INFO: task kworker/u65:1:2630 blocked for more than 241 seconds. -kernel: Tainted: G OE 5.3.5-050305-generic #201910071830 -kernel: "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. -kernel: kworker/u65:1 D 0 2630 2 0x80004000 -kernel: Workqueue: nvme-wq nvme_scan_work [nvme_core] -kernel: Call Trace: -kernel: __schedule+0x2b9/0x6c0 -kernel: schedule+0x42/0xb0 -kernel: io_schedule+0x16/0x40 -kernel: do_read_cache_page+0x438/0x830 -kernel: ? __switch_to_asm+0x34/0x70 -kernel: ? file_fdatawait_range+0x30/0x30 -kernel: read_cache_page+0x12/0x20 -kernel: read_dev_sector+0x27/0xc0 -kernel: read_lba+0xc1/0x220 -kernel: ? kmem_cache_alloc_trace+0x19c/0x230 -kernel: efi_partition+0x1e6/0x708 -kernel: ? vsnprintf+0x39e/0x4e0 -kernel: ? snprintf+0x49/0x60 -kernel: check_partition+0x154/0x244 -kernel: rescan_partitions+0xae/0x280 -kernel: __blkdev_get+0x40f/0x560 -kernel: blkdev_get+0x3d/0x140 -kernel: __device_add_disk+0x388/0x480 -kernel: device_add_disk+0x13/0x20 -kernel: nvme_mpath_set_live+0x119/0x140 [nvme_core] -kernel: nvme_update_ns_ana_state+0x5c/0x60 [nvme_core] -kernel: nvme_set_ns_ana_state+0x1e/0x30 [nvme_core] -kernel: nvme_parse_ana_log+0xa1/0x180 [nvme_core] -kernel: ? nvme_update_ns_ana_state+0x60/0x60 [nvme_core] -kernel: nvme_mpath_add_disk+0x47/0x90 [nvme_core] -kernel: nvme_validate_ns+0x396/0x940 [nvme_core] -kernel: ? blk_mq_free_request+0xd2/0x100 -kernel: nvme_scan_work+0x24f/0x380 [nvme_core] -kernel: process_one_work+0x1db/0x380 -kernel: worker_thread+0x249/0x400 -kernel: kthread+0x104/0x140 -kernel: ? process_one_work+0x380/0x380 -kernel: ? kthread_park+0x80/0x80 -kernel: ret_from_fork+0x1f/0x40 --- - -Fixes: fab7772bfbcf ("nvme-multipath: revalidate nvme_ns_head gendisk -in nvme_validate_ns") -Signed-off-by: Anton Eidelman -Signed-off-by: Sagi Grimberg -Signed-off-by: Christoph Hellwig -Signed-off-by: Sasha Levin ---- - drivers/nvme/host/core.c | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c -index 887139f8fa53b..85ce6c682849e 100644 ---- a/drivers/nvme/host/core.c -+++ b/drivers/nvme/host/core.c -@@ -1910,7 +1910,6 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) - if (ns->head->disk) { - nvme_update_disk_info(ns->head->disk, ns, id); - blk_queue_stack_limits(ns->head->disk->queue, ns->queue); -- revalidate_disk(ns->head->disk); - } - #endif - } --- -2.25.1 - diff --git a/queue-5.7/nvme-multipath-fix-deadlock-between-ana_work-and-sca.patch b/queue-5.7/nvme-multipath-fix-deadlock-between-ana_work-and-sca.patch deleted file mode 100644 index 983e5debcc4..00000000000 --- a/queue-5.7/nvme-multipath-fix-deadlock-between-ana_work-and-sca.patch +++ /dev/null @@ -1,134 +0,0 @@ -From 1e2122e82988e8b94d08f84e5242ef3f414e2bcc Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Wed, 24 Jun 2020 01:53:09 -0700 -Subject: nvme-multipath: fix deadlock between ana_work and scan_work - -From: Anton Eidelman - -[ Upstream commit 489dd102a2c7c94d783a35f9412eb085b8da1aa4 ] - -When scan_work calls nvme_mpath_add_disk() this holds ana_lock -and invokes nvme_parse_ana_log(), which may issue IO -in device_add_disk() and hang waiting for an accessible path. -While nvme_mpath_set_live() only called when nvme_state_is_live(), -a transition may cause NVME_SC_ANA_TRANSITION and requeue the IO. - -In order to recover and complete the IO ana_work on the same ctrl -should be able to update the path state and remove NVME_NS_ANA_PENDING. - -The deadlock occurs because scan_work keeps holding ana_lock, -so ana_work hangs [1]. - -Fix: -Now nvme_mpath_add_disk() uses nvme_parse_ana_log() to obtain a copy -of the ANA group desc, and then calls nvme_update_ns_ana_state() without -holding ana_lock. - -[1]: -kernel: Workqueue: nvme-wq nvme_scan_work [nvme_core] -kernel: Call Trace: -kernel: __schedule+0x2b9/0x6c0 -kernel: schedule+0x42/0xb0 -kernel: io_schedule+0x16/0x40 -kernel: do_read_cache_page+0x438/0x830 -kernel: read_cache_page+0x12/0x20 -kernel: read_dev_sector+0x27/0xc0 -kernel: read_lba+0xc1/0x220 -kernel: efi_partition+0x1e6/0x708 -kernel: check_partition+0x154/0x244 -kernel: rescan_partitions+0xae/0x280 -kernel: __blkdev_get+0x40f/0x560 -kernel: blkdev_get+0x3d/0x140 -kernel: __device_add_disk+0x388/0x480 -kernel: device_add_disk+0x13/0x20 -kernel: nvme_mpath_set_live+0x119/0x140 [nvme_core] -kernel: nvme_update_ns_ana_state+0x5c/0x60 [nvme_core] -kernel: nvme_set_ns_ana_state+0x1e/0x30 [nvme_core] -kernel: nvme_parse_ana_log+0xa1/0x180 [nvme_core] -kernel: nvme_mpath_add_disk+0x47/0x90 [nvme_core] -kernel: nvme_validate_ns+0x396/0x940 [nvme_core] -kernel: nvme_scan_work+0x24f/0x380 [nvme_core] -kernel: process_one_work+0x1db/0x380 -kernel: worker_thread+0x249/0x400 -kernel: kthread+0x104/0x140 - -kernel: Workqueue: nvme-wq nvme_ana_work [nvme_core] -kernel: Call Trace: -kernel: __schedule+0x2b9/0x6c0 -kernel: schedule+0x42/0xb0 -kernel: schedule_preempt_disabled+0xe/0x10 -kernel: __mutex_lock.isra.0+0x182/0x4f0 -kernel: ? __switch_to_asm+0x34/0x70 -kernel: ? select_task_rq_fair+0x1aa/0x5c0 -kernel: ? kvm_sched_clock_read+0x11/0x20 -kernel: ? sched_clock+0x9/0x10 -kernel: __mutex_lock_slowpath+0x13/0x20 -kernel: mutex_lock+0x2e/0x40 -kernel: nvme_read_ana_log+0x3a/0x100 [nvme_core] -kernel: nvme_ana_work+0x15/0x20 [nvme_core] -kernel: process_one_work+0x1db/0x380 -kernel: worker_thread+0x4d/0x400 -kernel: kthread+0x104/0x140 -kernel: ? process_one_work+0x380/0x380 -kernel: ? kthread_park+0x80/0x80 -kernel: ret_from_fork+0x35/0x40 - -Fixes: 0d0b660f214d ("nvme: add ANA support") -Signed-off-by: Anton Eidelman -Signed-off-by: Sagi Grimberg -Signed-off-by: Christoph Hellwig -Signed-off-by: Sasha Levin ---- - drivers/nvme/host/multipath.c | 24 ++++++++++++++++-------- - 1 file changed, 16 insertions(+), 8 deletions(-) - -diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c -index 9f2844935fdfa..fece4654fa3e7 100644 ---- a/drivers/nvme/host/multipath.c -+++ b/drivers/nvme/host/multipath.c -@@ -641,26 +641,34 @@ static ssize_t ana_state_show(struct device *dev, struct device_attribute *attr, - } - DEVICE_ATTR_RO(ana_state); - --static int nvme_set_ns_ana_state(struct nvme_ctrl *ctrl, -+static int nvme_lookup_ana_group_desc(struct nvme_ctrl *ctrl, - struct nvme_ana_group_desc *desc, void *data) - { -- struct nvme_ns *ns = data; -+ struct nvme_ana_group_desc *dst = data; - -- if (ns->ana_grpid == le32_to_cpu(desc->grpid)) { -- nvme_update_ns_ana_state(desc, ns); -- return -ENXIO; /* just break out of the loop */ -- } -+ if (desc->grpid != dst->grpid) -+ return 0; - -- return 0; -+ *dst = *desc; -+ return -ENXIO; /* just break out of the loop */ - } - - void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id) - { - if (nvme_ctrl_use_ana(ns->ctrl)) { -+ struct nvme_ana_group_desc desc = { -+ .grpid = id->anagrpid, -+ .state = 0, -+ }; -+ - mutex_lock(&ns->ctrl->ana_lock); - ns->ana_grpid = le32_to_cpu(id->anagrpid); -- nvme_parse_ana_log(ns->ctrl, ns, nvme_set_ns_ana_state); -+ nvme_parse_ana_log(ns->ctrl, &desc, nvme_lookup_ana_group_desc); - mutex_unlock(&ns->ctrl->ana_lock); -+ if (desc.state) { -+ /* found the group desc: update */ -+ nvme_update_ns_ana_state(&desc, ns); -+ } - } else { - mutex_lock(&ns->head->lock); - ns->ana_state = NVME_ANA_OPTIMIZED; --- -2.25.1 - diff --git a/queue-5.7/nvme-multipath-fix-deadlock-due-to-head-lock.patch b/queue-5.7/nvme-multipath-fix-deadlock-due-to-head-lock.patch deleted file mode 100644 index 332ff9e48d0..00000000000 --- a/queue-5.7/nvme-multipath-fix-deadlock-due-to-head-lock.patch +++ /dev/null @@ -1,124 +0,0 @@ -From b7801a8513594e3da5ec2435e0267919e99faf5d Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Wed, 24 Jun 2020 01:53:11 -0700 -Subject: nvme-multipath: fix deadlock due to head->lock - -From: Anton Eidelman - -[ Upstream commit d8a22f85609fadb46ba699e0136cc3ebdeebff79 ] - -In the following scenario scan_work and ana_work will deadlock: - -When scan_work calls nvme_mpath_add_disk() this holds ana_lock -and invokes nvme_parse_ana_log(), which may issue IO -in device_add_disk() and hang waiting for an accessible path. - -While nvme_mpath_set_live() only called when nvme_state_is_live(), -a transition may cause NVME_SC_ANA_TRANSITION and requeue the IO. - -Since nvme_mpath_set_live() holds ns->head->lock, an ana_work on -ANY ctrl will not be able to complete nvme_mpath_set_live() -on the same ns->head, which is required in order to update -the new accessible path and remove NVME_NS_ANA_PENDING.. -Therefore IO never completes: deadlock [1]. - -Fix: -Move device_add_disk out of the head->lock and protect it with an -atomic test_and_set for a new NVME_NS_HEAD_HAS_DISK bit. - -[1]: -kernel: INFO: task kworker/u8:2:160 blocked for more than 120 seconds. -kernel: Tainted: G OE 5.3.5-050305-generic #201910071830 -kernel: "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. -kernel: kworker/u8:2 D 0 160 2 0x80004000 -kernel: Workqueue: nvme-wq nvme_ana_work [nvme_core] -kernel: Call Trace: -kernel: __schedule+0x2b9/0x6c0 -kernel: schedule+0x42/0xb0 -kernel: schedule_preempt_disabled+0xe/0x10 -kernel: __mutex_lock.isra.0+0x182/0x4f0 -kernel: __mutex_lock_slowpath+0x13/0x20 -kernel: mutex_lock+0x2e/0x40 -kernel: nvme_update_ns_ana_state+0x22/0x60 [nvme_core] -kernel: nvme_update_ana_state+0xca/0xe0 [nvme_core] -kernel: nvme_parse_ana_log+0xa1/0x180 [nvme_core] -kernel: nvme_read_ana_log+0x76/0x100 [nvme_core] -kernel: nvme_ana_work+0x15/0x20 [nvme_core] -kernel: process_one_work+0x1db/0x380 -kernel: worker_thread+0x4d/0x400 -kernel: kthread+0x104/0x140 -kernel: ret_from_fork+0x35/0x40 -kernel: INFO: task kworker/u8:4:439 blocked for more than 120 seconds. -kernel: Tainted: G OE 5.3.5-050305-generic #201910071830 -kernel: "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. -kernel: kworker/u8:4 D 0 439 2 0x80004000 -kernel: Workqueue: nvme-wq nvme_scan_work [nvme_core] -kernel: Call Trace: -kernel: __schedule+0x2b9/0x6c0 -kernel: schedule+0x42/0xb0 -kernel: io_schedule+0x16/0x40 -kernel: do_read_cache_page+0x438/0x830 -kernel: read_cache_page+0x12/0x20 -kernel: read_dev_sector+0x27/0xc0 -kernel: read_lba+0xc1/0x220 -kernel: efi_partition+0x1e6/0x708 -kernel: check_partition+0x154/0x244 -kernel: rescan_partitions+0xae/0x280 -kernel: __blkdev_get+0x40f/0x560 -kernel: blkdev_get+0x3d/0x140 -kernel: __device_add_disk+0x388/0x480 -kernel: device_add_disk+0x13/0x20 -kernel: nvme_mpath_set_live+0x119/0x140 [nvme_core] -kernel: nvme_update_ns_ana_state+0x5c/0x60 [nvme_core] -kernel: nvme_mpath_add_disk+0xbe/0x100 [nvme_core] -kernel: nvme_validate_ns+0x396/0x940 [nvme_core] -kernel: nvme_scan_work+0x256/0x390 [nvme_core] -kernel: process_one_work+0x1db/0x380 -kernel: worker_thread+0x4d/0x400 -kernel: kthread+0x104/0x140 -kernel: ret_from_fork+0x35/0x40 - -Fixes: 0d0b660f214d ("nvme: add ANA support") -Signed-off-by: Anton Eidelman -Signed-off-by: Sagi Grimberg -Signed-off-by: Christoph Hellwig -Signed-off-by: Sasha Levin ---- - drivers/nvme/host/multipath.c | 4 ++-- - drivers/nvme/host/nvme.h | 2 ++ - 2 files changed, 4 insertions(+), 2 deletions(-) - -diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c -index f4287d8550a9f..d1cb65698288b 100644 ---- a/drivers/nvme/host/multipath.c -+++ b/drivers/nvme/host/multipath.c -@@ -413,11 +413,11 @@ static void nvme_mpath_set_live(struct nvme_ns *ns) - if (!head->disk) - return; - -- mutex_lock(&head->lock); -- if (!(head->disk->flags & GENHD_FL_UP)) -+ if (!test_and_set_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) - device_add_disk(&head->subsys->dev, head->disk, - nvme_ns_id_attr_groups); - -+ mutex_lock(&head->lock); - if (nvme_path_is_optimized(ns)) { - int node, srcu_idx; - -diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h -index 2e04a36296d95..719342600be62 100644 ---- a/drivers/nvme/host/nvme.h -+++ b/drivers/nvme/host/nvme.h -@@ -359,6 +359,8 @@ struct nvme_ns_head { - spinlock_t requeue_lock; - struct work_struct requeue_work; - struct mutex lock; -+ unsigned long flags; -+#define NVME_NSHEAD_DISK_LIVE 0 - struct nvme_ns __rcu *current_path[]; - #endif - }; --- -2.25.1 - diff --git a/queue-5.7/nvme-multipath-set-bdi-capabilities-once.patch b/queue-5.7/nvme-multipath-set-bdi-capabilities-once.patch deleted file mode 100644 index fe022f125cb..00000000000 --- a/queue-5.7/nvme-multipath-set-bdi-capabilities-once.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 918ae436c112962b408d439e0a82ab2dd7f645d6 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Thu, 9 Apr 2020 09:09:04 -0700 -Subject: nvme-multipath: set bdi capabilities once - -From: Keith Busch - -[ Upstream commit b2ce4d90690bd29ce5b554e203cd03682dd59697 ] - -The queues' backing device info capabilities don't change with each -namespace revalidation. Set it only when each path's request_queue -is initially added to a multipath queue. - -Signed-off-by: Keith Busch -Reviewed-by: Sagi Grimberg -Signed-off-by: Christoph Hellwig -Signed-off-by: Jens Axboe -Signed-off-by: Sasha Levin ---- - drivers/nvme/host/core.c | 7 ------- - drivers/nvme/host/multipath.c | 8 ++++++++ - 2 files changed, 8 insertions(+), 7 deletions(-) - -diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c -index 7b4cbe2c69541..887139f8fa53b 100644 ---- a/drivers/nvme/host/core.c -+++ b/drivers/nvme/host/core.c -@@ -1910,13 +1910,6 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) - if (ns->head->disk) { - nvme_update_disk_info(ns->head->disk, ns, id); - blk_queue_stack_limits(ns->head->disk->queue, ns->queue); -- if (bdi_cap_stable_pages_required(ns->queue->backing_dev_info)) { -- struct backing_dev_info *info = -- ns->head->disk->queue->backing_dev_info; -- -- info->capabilities |= BDI_CAP_STABLE_WRITES; -- } -- - revalidate_disk(ns->head->disk); - } - #endif -diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c -index 54603bd3e02de..9f2844935fdfa 100644 ---- a/drivers/nvme/host/multipath.c -+++ b/drivers/nvme/host/multipath.c -@@ -3,6 +3,7 @@ - * Copyright (c) 2017-2018 Christoph Hellwig. - */ - -+#include - #include - #include - #include "nvme.h" -@@ -666,6 +667,13 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id) - nvme_mpath_set_live(ns); - mutex_unlock(&ns->head->lock); - } -+ -+ if (bdi_cap_stable_pages_required(ns->queue->backing_dev_info)) { -+ struct backing_dev_info *info = -+ ns->head->disk->queue->backing_dev_info; -+ -+ info->capabilities |= BDI_CAP_STABLE_WRITES; -+ } - } - - void nvme_mpath_remove_disk(struct nvme_ns_head *head) --- -2.25.1 - diff --git a/queue-5.7/series b/queue-5.7/series index ce9b454bfaf..d2384c4f28d 100644 --- a/queue-5.7/series +++ b/queue-5.7/series @@ -158,11 +158,7 @@ arm-imx5-add-missing-put_device-call-in-imx_suspend_.patch scsi-lpfc-avoid-another-null-dereference-in-lpfc_sli.patch usb-gadget-udc-potential-oops-in-error-handling-code.patch usb-renesas_usbhs-getting-residue-from-callback_resu.patch -nvme-multipath-set-bdi-capabilities-once.patch -nvme-fix-possible-deadlock-when-i-o-is-blocked.patch -nvme-multipath-fix-deadlock-between-ana_work-and-sca.patch nvme-don-t-protect-ns-mutation-with-ns-head-lock.patch -nvme-multipath-fix-deadlock-due-to-head-lock.patch qed-add-missing-error-test-for-dbg_status_no_matchin.patch netfilter-ipset-fix-unaligned-atomic-access.patch net-bcmgenet-use-hardware-padding-of-runt-frames.patch -- 2.47.3