From 43414dab200567f212ef99f97e63a954546e6970 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 22 Apr 2018 15:29:16 +0200 Subject: [PATCH] 4.9-stable patches added patches: block-mq-fix-potential-deadlock-during-cpu-hotplug.patch --- ...otential-deadlock-during-cpu-hotplug.patch | 170 ++++++++++++++++++ queue-4.9/series | 1 + 2 files changed, 171 insertions(+) create mode 100644 queue-4.9/block-mq-fix-potential-deadlock-during-cpu-hotplug.patch diff --git a/queue-4.9/block-mq-fix-potential-deadlock-during-cpu-hotplug.patch b/queue-4.9/block-mq-fix-potential-deadlock-during-cpu-hotplug.patch new file mode 100644 index 00000000000..78e24499bbb --- /dev/null +++ b/queue-4.9/block-mq-fix-potential-deadlock-during-cpu-hotplug.patch @@ -0,0 +1,170 @@ +From 51d638b1f56a0bfd9219800620994794a1a2b219 Mon Sep 17 00:00:00 2001 +From: Wanpeng Li +Date: Sun, 7 May 2017 00:14:22 -0700 +Subject: block/mq: fix potential deadlock during cpu hotplug + +From: Wanpeng Li + +commit 51d638b1f56a0bfd9219800620994794a1a2b219 upstream. + +This can be triggered by hot-unplug one cpu. + +====================================================== + [ INFO: possible circular locking dependency detected ] + 4.11.0+ #17 Not tainted + ------------------------------------------------------- + step_after_susp/2640 is trying to acquire lock: + (all_q_mutex){+.+...}, at: [] blk_mq_queue_reinit_work+0x18/0x110 + + but task is already holding lock: + (cpu_hotplug.lock){+.+.+.}, at: [] cpu_hotplug_begin+0x7f/0xe0 + + which lock already depends on the new lock. + + the existing dependency chain (in reverse order) is: + + -> #1 (cpu_hotplug.lock){+.+.+.}: + lock_acquire+0x11c/0x230 + __mutex_lock+0x92/0x990 + mutex_lock_nested+0x1b/0x20 + get_online_cpus+0x64/0x80 + blk_mq_init_allocated_queue+0x3a0/0x4e0 + blk_mq_init_queue+0x3a/0x60 + loop_add+0xe5/0x280 + loop_init+0x124/0x177 + do_one_initcall+0x53/0x1c0 + kernel_init_freeable+0x1e3/0x27f + kernel_init+0xe/0x100 + ret_from_fork+0x31/0x40 + + -> #0 (all_q_mutex){+.+...}: + __lock_acquire+0x189a/0x18a0 + lock_acquire+0x11c/0x230 + __mutex_lock+0x92/0x990 + mutex_lock_nested+0x1b/0x20 + blk_mq_queue_reinit_work+0x18/0x110 + blk_mq_queue_reinit_dead+0x1c/0x20 + cpuhp_invoke_callback+0x1f2/0x810 + cpuhp_down_callbacks+0x42/0x80 + _cpu_down+0xb2/0xe0 + freeze_secondary_cpus+0xb6/0x390 + suspend_devices_and_enter+0x3b3/0xa40 + pm_suspend+0x129/0x490 + state_store+0x82/0xf0 + kobj_attr_store+0xf/0x20 + sysfs_kf_write+0x45/0x60 + kernfs_fop_write+0x135/0x1c0 + __vfs_write+0x37/0x160 + vfs_write+0xcd/0x1d0 + SyS_write+0x58/0xc0 + do_syscall_64+0x8f/0x710 + return_from_SYSCALL_64+0x0/0x7a + + other info that might help us debug this: + + Possible unsafe locking scenario: + + CPU0 CPU1 + ---- ---- + lock(cpu_hotplug.lock); + lock(all_q_mutex); + lock(cpu_hotplug.lock); + lock(all_q_mutex); + + *** DEADLOCK *** + + 8 locks held by step_after_susp/2640: + #0: (sb_writers#6){.+.+.+}, at: [] vfs_write+0x1ad/0x1d0 + #1: (&of->mutex){+.+.+.}, at: [] kernfs_fop_write+0x101/0x1c0 + #2: (s_active#166){.+.+.+}, at: [] kernfs_fop_write+0x109/0x1c0 + #3: (pm_mutex){+.+...}, at: [] pm_suspend+0x21d/0x490 + #4: (acpi_scan_lock){+.+.+.}, at: [] acpi_scan_lock_acquire+0x17/0x20 + #5: (cpu_add_remove_lock){+.+.+.}, at: [] freeze_secondary_cpus+0x27/0x390 + #6: (cpu_hotplug.dep_map){++++++}, at: [] cpu_hotplug_begin+0x5/0xe0 + #7: (cpu_hotplug.lock){+.+.+.}, at: [] cpu_hotplug_begin+0x7f/0xe0 + + stack backtrace: + CPU: 3 PID: 2640 Comm: step_after_susp Not tainted 4.11.0+ #17 + Hardware name: Dell Inc. OptiPlex 7040/0JCTF8, BIOS 1.4.9 09/12/2016 + Call Trace: + dump_stack+0x99/0xce + print_circular_bug+0x1fa/0x270 + __lock_acquire+0x189a/0x18a0 + lock_acquire+0x11c/0x230 + ? lock_acquire+0x11c/0x230 + ? blk_mq_queue_reinit_work+0x18/0x110 + ? blk_mq_queue_reinit_work+0x18/0x110 + __mutex_lock+0x92/0x990 + ? blk_mq_queue_reinit_work+0x18/0x110 + ? kmem_cache_free+0x2cb/0x330 + ? anon_transport_class_unregister+0x20/0x20 + ? blk_mq_queue_reinit_work+0x110/0x110 + mutex_lock_nested+0x1b/0x20 + ? mutex_lock_nested+0x1b/0x20 + blk_mq_queue_reinit_work+0x18/0x110 + blk_mq_queue_reinit_dead+0x1c/0x20 + cpuhp_invoke_callback+0x1f2/0x810 + ? __flow_cache_shrink+0x160/0x160 + cpuhp_down_callbacks+0x42/0x80 + _cpu_down+0xb2/0xe0 + freeze_secondary_cpus+0xb6/0x390 + suspend_devices_and_enter+0x3b3/0xa40 + ? rcu_read_lock_sched_held+0x79/0x80 + pm_suspend+0x129/0x490 + state_store+0x82/0xf0 + kobj_attr_store+0xf/0x20 + sysfs_kf_write+0x45/0x60 + kernfs_fop_write+0x135/0x1c0 + __vfs_write+0x37/0x160 + ? rcu_read_lock_sched_held+0x79/0x80 + ? rcu_sync_lockdep_assert+0x2f/0x60 + ? __sb_start_write+0xd9/0x1c0 + ? vfs_write+0x1ad/0x1d0 + vfs_write+0xcd/0x1d0 + SyS_write+0x58/0xc0 + ? rcu_read_lock_sched_held+0x79/0x80 + do_syscall_64+0x8f/0x710 + ? trace_hardirqs_on_thunk+0x1a/0x1c + entry_SYSCALL64_slow_path+0x25/0x25 + +The cpu hotplug path will hold cpu_hotplug.lock and then reinit all exiting +queues for blk mq w/ all_q_mutex, however, blk_mq_init_allocated_queue() will +contend these two locks in the inversion order. This is due to commit eabe06595d62 +(blk/mq: Cure cpu hotplug lock inversion), it fixes a cpu hotplug lock inversion +issue because of hotplug rework, however the hotplug rework is still work-in-progress +and lives in a -tip branch and mainline cannot yet trigger that splat. The commit +breaks the linus's tree in the merge window, so this patch reverts the lock order +and avoids to splat linus's tree. + +Cc: Jens Axboe +Cc: Peter Zijlstra (Intel) +Cc: Thomas Gleixner +Signed-off-by: Wanpeng Li +Signed-off-by: Jens Axboe +Cc: Thierry Escande +Signed-off-by: Greg Kroah-Hartman + +--- + block/blk-mq.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -2019,15 +2019,15 @@ struct request_queue *blk_mq_init_alloca + + blk_mq_init_cpu_queues(q, set->nr_hw_queues); + +- mutex_lock(&all_q_mutex); + get_online_cpus(); ++ mutex_lock(&all_q_mutex); + + list_add_tail(&q->all_q_node, &all_q_list); + blk_mq_add_queue_tag_set(set, q); + blk_mq_map_swqueue(q, cpu_online_mask); + +- put_online_cpus(); + mutex_unlock(&all_q_mutex); ++ put_online_cpus(); + + return q; + diff --git a/queue-4.9/series b/queue-4.9/series index ef246460168..da11c9d71b3 100644 --- a/queue-4.9/series +++ b/queue-4.9/series @@ -92,3 +92,4 @@ autofs-mount-point-create-should-honour-passed-in-mode.patch mm-filemap.c-fix-null-pointer-in-page_cache_tree_insert.patch fanotify-fix-logic-of-events-on-child.patch writeback-safer-lock-nesting.patch +block-mq-fix-potential-deadlock-during-cpu-hotplug.patch -- 2.47.3