]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob - releases/3.16.3/ib-srp-fix-deadlock-between-host-removal-and-multipathd.patch
4.9-stable patches
[thirdparty/kernel/stable-queue.git] / releases / 3.16.3 / ib-srp-fix-deadlock-between-host-removal-and-multipathd.patch
1 From bcc05910359183b431da92713e98eed478edf83a Mon Sep 17 00:00:00 2001
2 From: Bart Van Assche <bvanassche@acm.org>
3 Date: Wed, 9 Jul 2014 15:57:26 +0200
4 Subject: IB/srp: Fix deadlock between host removal and multipathd
5
6 From: Bart Van Assche <bvanassche@acm.org>
7
8 commit bcc05910359183b431da92713e98eed478edf83a upstream.
9
10 If scsi_remove_host() is invoked after a SCSI device has been blocked,
11 if the fast_io_fail_tmo or dev_loss_tmo work gets scheduled on the
12 workqueue executing srp_remove_work() and if an I/O request is
13 scheduled after the SCSI device had been blocked by e.g. multipathd
14 then the following deadlock can occur:
15
16 kworker/6:1 D ffff880831f3c460 0 195 2 0x00000000
17 Call Trace:
18 [<ffffffff814aafd9>] schedule+0x29/0x70
19 [<ffffffff814aa0ef>] schedule_timeout+0x10f/0x2a0
20 [<ffffffff8105af6f>] msleep+0x2f/0x40
21 [<ffffffff8123b0ae>] __blk_drain_queue+0x4e/0x180
22 [<ffffffff8123d2d5>] blk_cleanup_queue+0x225/0x230
23 [<ffffffffa0010732>] __scsi_remove_device+0x62/0xe0 [scsi_mod]
24 [<ffffffffa000ed2f>] scsi_forget_host+0x6f/0x80 [scsi_mod]
25 [<ffffffffa0002eba>] scsi_remove_host+0x7a/0x130 [scsi_mod]
26 [<ffffffffa07cf5c5>] srp_remove_work+0x95/0x180 [ib_srp]
27 [<ffffffff8106d7aa>] process_one_work+0x1ea/0x6c0
28 [<ffffffff8106dd9b>] worker_thread+0x11b/0x3a0
29 [<ffffffff810758bd>] kthread+0xed/0x110
30 [<ffffffff814b972c>] ret_from_fork+0x7c/0xb0
31 multipathd D ffff880096acc460 0 5340 1 0x00000000
32 Call Trace:
33 [<ffffffff814aafd9>] schedule+0x29/0x70
34 [<ffffffff814aa0ef>] schedule_timeout+0x10f/0x2a0
35 [<ffffffff814ab79b>] io_schedule_timeout+0x9b/0xf0
36 [<ffffffff814abe1c>] wait_for_completion_io_timeout+0xdc/0x110
37 [<ffffffff81244b9b>] blk_execute_rq+0x9b/0x100
38 [<ffffffff8124f665>] sg_io+0x1a5/0x450
39 [<ffffffff8124fd21>] scsi_cmd_ioctl+0x2a1/0x430
40 [<ffffffff8124fef2>] scsi_cmd_blk_ioctl+0x42/0x50
41 [<ffffffffa00ec97e>] sd_ioctl+0xbe/0x140 [sd_mod]
42 [<ffffffff8124bd04>] blkdev_ioctl+0x234/0x840
43 [<ffffffff811cb491>] block_ioctl+0x41/0x50
44 [<ffffffff811a0df0>] do_vfs_ioctl+0x300/0x520
45 [<ffffffff811a1051>] SyS_ioctl+0x41/0x80
46 [<ffffffff814b9962>] tracesys+0xd0/0xd5
47
48 Fix this by scheduling removal work on another workqueue than the
49 transport layer timers.
50
51 Signed-off-by: Bart Van Assche <bvanassche@acm.org>
52 Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
53 Reviewed-by: David Dillow <dave@thedillows.org>
54 Cc: Sebastian Parschauer <sebastian.riemer@profitbricks.com>
55 Signed-off-by: Roland Dreier <roland@purestorage.com>
56 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
57
58 ---
59 drivers/infiniband/ulp/srp/ib_srp.c | 38 ++++++++++++++++++++++++++----------
60 1 file changed, 28 insertions(+), 10 deletions(-)
61
62 --- a/drivers/infiniband/ulp/srp/ib_srp.c
63 +++ b/drivers/infiniband/ulp/srp/ib_srp.c
64 @@ -130,6 +130,7 @@ static void srp_send_completion(struct i
65 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
66
67 static struct scsi_transport_template *ib_srp_transport_template;
68 +static struct workqueue_struct *srp_remove_wq;
69
70 static struct ib_client srp_client = {
71 .name = "srp",
72 @@ -731,7 +732,7 @@ static bool srp_queue_remove_work(struct
73 spin_unlock_irq(&target->lock);
74
75 if (changed)
76 - queue_work(system_long_wq, &target->remove_work);
77 + queue_work(srp_remove_wq, &target->remove_work);
78
79 return changed;
80 }
81 @@ -3261,9 +3262,10 @@ static void srp_remove_one(struct ib_dev
82 spin_unlock(&host->target_lock);
83
84 /*
85 - * Wait for target port removal tasks.
86 + * Wait for tl_err and target port removal tasks.
87 */
88 flush_workqueue(system_long_wq);
89 + flush_workqueue(srp_remove_wq);
90
91 kfree(host);
92 }
93 @@ -3313,16 +3315,22 @@ static int __init srp_init_module(void)
94 indirect_sg_entries = cmd_sg_entries;
95 }
96
97 + srp_remove_wq = create_workqueue("srp_remove");
98 + if (IS_ERR(srp_remove_wq)) {
99 + ret = PTR_ERR(srp_remove_wq);
100 + goto out;
101 + }
102 +
103 + ret = -ENOMEM;
104 ib_srp_transport_template =
105 srp_attach_transport(&ib_srp_transport_functions);
106 if (!ib_srp_transport_template)
107 - return -ENOMEM;
108 + goto destroy_wq;
109
110 ret = class_register(&srp_class);
111 if (ret) {
112 pr_err("couldn't register class infiniband_srp\n");
113 - srp_release_transport(ib_srp_transport_template);
114 - return ret;
115 + goto release_tr;
116 }
117
118 ib_sa_register_client(&srp_sa_client);
119 @@ -3330,13 +3338,22 @@ static int __init srp_init_module(void)
120 ret = ib_register_client(&srp_client);
121 if (ret) {
122 pr_err("couldn't register IB client\n");
123 - srp_release_transport(ib_srp_transport_template);
124 - ib_sa_unregister_client(&srp_sa_client);
125 - class_unregister(&srp_class);
126 - return ret;
127 + goto unreg_sa;
128 }
129
130 - return 0;
131 +out:
132 + return ret;
133 +
134 +unreg_sa:
135 + ib_sa_unregister_client(&srp_sa_client);
136 + class_unregister(&srp_class);
137 +
138 +release_tr:
139 + srp_release_transport(ib_srp_transport_template);
140 +
141 +destroy_wq:
142 + destroy_workqueue(srp_remove_wq);
143 + goto out;
144 }
145
146 static void __exit srp_cleanup_module(void)
147 @@ -3345,6 +3362,7 @@ static void __exit srp_cleanup_module(vo
148 ib_sa_unregister_client(&srp_sa_client);
149 class_unregister(&srp_class);
150 srp_release_transport(ib_srp_transport_template);
151 + destroy_workqueue(srp_remove_wq);
152 }
153
154 module_init(srp_init_module);