]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.9-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 22 Apr 2018 13:29:16 +0000 (15:29 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 22 Apr 2018 13:29:16 +0000 (15:29 +0200)
added patches:
block-mq-fix-potential-deadlock-during-cpu-hotplug.patch

queue-4.9/block-mq-fix-potential-deadlock-during-cpu-hotplug.patch [new file with mode: 0644]
queue-4.9/series

diff --git a/queue-4.9/block-mq-fix-potential-deadlock-during-cpu-hotplug.patch b/queue-4.9/block-mq-fix-potential-deadlock-during-cpu-hotplug.patch
new file mode 100644 (file)
index 0000000..78e2449
--- /dev/null
@@ -0,0 +1,170 @@
+From 51d638b1f56a0bfd9219800620994794a1a2b219 Mon Sep 17 00:00:00 2001
+From: Wanpeng Li <wanpeng.li@hotmail.com>
+Date: Sun, 7 May 2017 00:14:22 -0700
+Subject: block/mq: fix potential deadlock during cpu hotplug
+
+From: Wanpeng Li <wanpeng.li@hotmail.com>
+
+commit 51d638b1f56a0bfd9219800620994794a1a2b219 upstream.
+
+This can be triggered by hot-unplug one cpu.
+
+======================================================
+ [ INFO: possible circular locking dependency detected ]
+ 4.11.0+ #17 Not tainted
+ -------------------------------------------------------
+ step_after_susp/2640 is trying to acquire lock:
+  (all_q_mutex){+.+...}, at: [<ffffffffb33f95b8>] blk_mq_queue_reinit_work+0x18/0x110
+
+ but task is already holding lock:
+  (cpu_hotplug.lock){+.+.+.}, at: [<ffffffffb306d04f>] cpu_hotplug_begin+0x7f/0xe0
+
+ which lock already depends on the new lock.
+
+ the existing dependency chain (in reverse order) is:
+
+ -> #1 (cpu_hotplug.lock){+.+.+.}:
+        lock_acquire+0x11c/0x230
+        __mutex_lock+0x92/0x990
+        mutex_lock_nested+0x1b/0x20
+        get_online_cpus+0x64/0x80
+        blk_mq_init_allocated_queue+0x3a0/0x4e0
+        blk_mq_init_queue+0x3a/0x60
+        loop_add+0xe5/0x280
+        loop_init+0x124/0x177
+        do_one_initcall+0x53/0x1c0
+        kernel_init_freeable+0x1e3/0x27f
+        kernel_init+0xe/0x100
+        ret_from_fork+0x31/0x40
+
+ -> #0 (all_q_mutex){+.+...}:
+        __lock_acquire+0x189a/0x18a0
+        lock_acquire+0x11c/0x230
+        __mutex_lock+0x92/0x990
+        mutex_lock_nested+0x1b/0x20
+        blk_mq_queue_reinit_work+0x18/0x110
+        blk_mq_queue_reinit_dead+0x1c/0x20
+        cpuhp_invoke_callback+0x1f2/0x810
+        cpuhp_down_callbacks+0x42/0x80
+        _cpu_down+0xb2/0xe0
+        freeze_secondary_cpus+0xb6/0x390
+        suspend_devices_and_enter+0x3b3/0xa40
+        pm_suspend+0x129/0x490
+        state_store+0x82/0xf0
+        kobj_attr_store+0xf/0x20
+        sysfs_kf_write+0x45/0x60
+        kernfs_fop_write+0x135/0x1c0
+        __vfs_write+0x37/0x160
+        vfs_write+0xcd/0x1d0
+        SyS_write+0x58/0xc0
+        do_syscall_64+0x8f/0x710
+        return_from_SYSCALL_64+0x0/0x7a
+
+ other info that might help us debug this:
+
+  Possible unsafe locking scenario:
+
+        CPU0                    CPU1
+        ----                    ----
+   lock(cpu_hotplug.lock);
+                                lock(all_q_mutex);
+                                lock(cpu_hotplug.lock);
+   lock(all_q_mutex);
+
+  *** DEADLOCK ***
+
+ 8 locks held by step_after_susp/2640:
+  #0:  (sb_writers#6){.+.+.+}, at: [<ffffffffb3244aed>] vfs_write+0x1ad/0x1d0
+  #1:  (&of->mutex){+.+.+.}, at: [<ffffffffb32d3a51>] kernfs_fop_write+0x101/0x1c0
+  #2:  (s_active#166){.+.+.+}, at: [<ffffffffb32d3a59>] kernfs_fop_write+0x109/0x1c0
+  #3:  (pm_mutex){+.+...}, at: [<ffffffffb30d2ecd>] pm_suspend+0x21d/0x490
+  #4:  (acpi_scan_lock){+.+.+.}, at: [<ffffffffb34dc3d7>] acpi_scan_lock_acquire+0x17/0x20
+  #5:  (cpu_add_remove_lock){+.+.+.}, at: [<ffffffffb306d6d7>] freeze_secondary_cpus+0x27/0x390
+  #6:  (cpu_hotplug.dep_map){++++++}, at: [<ffffffffb306cfd5>] cpu_hotplug_begin+0x5/0xe0
+  #7:  (cpu_hotplug.lock){+.+.+.}, at: [<ffffffffb306d04f>] cpu_hotplug_begin+0x7f/0xe0
+
+ stack backtrace:
+ CPU: 3 PID: 2640 Comm: step_after_susp Not tainted 4.11.0+ #17
+ Hardware name: Dell Inc. OptiPlex 7040/0JCTF8, BIOS 1.4.9 09/12/2016
+ Call Trace:
+  dump_stack+0x99/0xce
+  print_circular_bug+0x1fa/0x270
+  __lock_acquire+0x189a/0x18a0
+  lock_acquire+0x11c/0x230
+  ? lock_acquire+0x11c/0x230
+  ? blk_mq_queue_reinit_work+0x18/0x110
+  ? blk_mq_queue_reinit_work+0x18/0x110
+  __mutex_lock+0x92/0x990
+  ? blk_mq_queue_reinit_work+0x18/0x110
+  ? kmem_cache_free+0x2cb/0x330
+  ? anon_transport_class_unregister+0x20/0x20
+  ? blk_mq_queue_reinit_work+0x110/0x110
+  mutex_lock_nested+0x1b/0x20
+  ? mutex_lock_nested+0x1b/0x20
+  blk_mq_queue_reinit_work+0x18/0x110
+  blk_mq_queue_reinit_dead+0x1c/0x20
+  cpuhp_invoke_callback+0x1f2/0x810
+  ? __flow_cache_shrink+0x160/0x160
+  cpuhp_down_callbacks+0x42/0x80
+  _cpu_down+0xb2/0xe0
+  freeze_secondary_cpus+0xb6/0x390
+  suspend_devices_and_enter+0x3b3/0xa40
+  ? rcu_read_lock_sched_held+0x79/0x80
+  pm_suspend+0x129/0x490
+  state_store+0x82/0xf0
+  kobj_attr_store+0xf/0x20
+  sysfs_kf_write+0x45/0x60
+  kernfs_fop_write+0x135/0x1c0
+  __vfs_write+0x37/0x160
+  ? rcu_read_lock_sched_held+0x79/0x80
+  ? rcu_sync_lockdep_assert+0x2f/0x60
+  ? __sb_start_write+0xd9/0x1c0
+  ? vfs_write+0x1ad/0x1d0
+  vfs_write+0xcd/0x1d0
+  SyS_write+0x58/0xc0
+  ? rcu_read_lock_sched_held+0x79/0x80
+  do_syscall_64+0x8f/0x710
+  ? trace_hardirqs_on_thunk+0x1a/0x1c
+  entry_SYSCALL64_slow_path+0x25/0x25
+
+The cpu hotplug path will hold cpu_hotplug.lock and then reinit all exiting
+queues for blk mq w/ all_q_mutex, however, blk_mq_init_allocated_queue() will
+contend these two locks in the inversion order. This is due to commit eabe06595d62
+(blk/mq: Cure cpu hotplug lock inversion), it fixes a cpu hotplug lock inversion
+issue because of hotplug rework, however the hotplug rework is still work-in-progress
+and lives in a -tip branch and mainline cannot yet trigger that splat. The commit
+breaks the linus's tree in the merge window, so this patch reverts the lock order
+and avoids to splat linus's tree.
+
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
+Signed-off-by: Jens Axboe <axboe@fb.com>
+Cc: Thierry Escande <thierry.escande@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ block/blk-mq.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/block/blk-mq.c
++++ b/block/blk-mq.c
+@@ -2019,15 +2019,15 @@ struct request_queue *blk_mq_init_alloca
+       blk_mq_init_cpu_queues(q, set->nr_hw_queues);
+-      mutex_lock(&all_q_mutex);
+       get_online_cpus();
++      mutex_lock(&all_q_mutex);
+       list_add_tail(&q->all_q_node, &all_q_list);
+       blk_mq_add_queue_tag_set(set, q);
+       blk_mq_map_swqueue(q, cpu_online_mask);
+-      put_online_cpus();
+       mutex_unlock(&all_q_mutex);
++      put_online_cpus();
+       return q;
index ef246460168fc85a91640524c60a50f90141f2da..da11c9d71b3e5623d1ef7439e74ffac5162bd817 100644 (file)
@@ -92,3 +92,4 @@ autofs-mount-point-create-should-honour-passed-in-mode.patch
 mm-filemap.c-fix-null-pointer-in-page_cache_tree_insert.patch
 fanotify-fix-logic-of-events-on-child.patch
 writeback-safer-lock-nesting.patch
+block-mq-fix-potential-deadlock-during-cpu-hotplug.patch