]>
Commit | Line | Data |
---|---|---|
00e5a55c BS |
1 | From: Gerald Schaefer <geraldsc@de.ibm.com> |
2 | Subject: cio: Crashes when repeatetly attaching/detaching devices. | |
3 | References: bnc#458339 | |
4 | ||
5 | Symptom: Oops in dmesg after attaching/detaching a device, subsequent | |
6 | calls to lscss hang. | |
7 | Problem: Incorrect reference counting of subchannel in relation to | |
8 | ccw devices, missing check for delayed registering of ccw | |
9 | devices and incorrectly failing the probe function for I/O | |
10 | subchannels (which leads to unbound subchannels that can't | |
11 | be unregistered). | |
12 | Solution: Make sure that the ccw device holds a reference of the | |
13 | subchannel, that it is not registered if the subchannel is | |
14 | not registered anymore, and schedule unregistering an I/O | |
15 | subchannel if probing encounters an error. | |
16 | ||
17 | Acked-by: John Jolly <jjolly@suse.de> | |
18 | --- | |
19 | drivers/s390/cio/cio.h | 1 | |
20 | drivers/s390/cio/device.c | 128 ++++++++++++++++++++++++++++++++++------------ | |
21 | 2 files changed, 98 insertions(+), 31 deletions(-) | |
22 | ||
23 | --- linux-sles11.orig/drivers/s390/cio/device.c | |
24 | +++ linux-sles11/drivers/s390/cio/device.c | |
25 | @@ -716,6 +716,8 @@ ccw_device_release(struct device *dev) | |
26 | struct ccw_device *cdev; | |
27 | ||
28 | cdev = to_ccwdev(dev); | |
29 | + /* Release reference of parent subchannel. */ | |
30 | + put_device(cdev->dev.parent); | |
31 | kfree(cdev->private); | |
32 | kfree(cdev); | |
33 | } | |
34 | @@ -790,37 +792,55 @@ static void sch_attach_disconnected_devi | |
35 | struct subchannel *other_sch; | |
36 | int ret; | |
37 | ||
38 | - other_sch = to_subchannel(get_device(cdev->dev.parent)); | |
39 | + /* Get reference for new parent. */ | |
40 | + if (!get_device(&sch->dev)) | |
41 | + return; | |
42 | + other_sch = to_subchannel(cdev->dev.parent); | |
43 | + /* Note: device_move() changes cdev->dev.parent */ | |
44 | ret = device_move(&cdev->dev, &sch->dev); | |
45 | if (ret) { | |
46 | CIO_MSG_EVENT(0, "Moving disconnected device 0.%x.%04x failed " | |
47 | "(ret=%d)!\n", cdev->private->dev_id.ssid, | |
48 | cdev->private->dev_id.devno, ret); | |
49 | - put_device(&other_sch->dev); | |
50 | + /* Put reference for new parent. */ | |
51 | + put_device(&sch->dev); | |
52 | return; | |
53 | } | |
54 | sch_set_cdev(other_sch, NULL); | |
55 | /* No need to keep a subchannel without ccw device around. */ | |
56 | css_sch_device_unregister(other_sch); | |
57 | - put_device(&other_sch->dev); | |
58 | sch_attach_device(sch, cdev); | |
59 | + /* Put reference for old parent. */ | |
60 | + put_device(&other_sch->dev); | |
61 | } | |
62 | ||
63 | static void sch_attach_orphaned_device(struct subchannel *sch, | |
64 | struct ccw_device *cdev) | |
65 | { | |
66 | int ret; | |
67 | + struct subchannel *pseudo_sch; | |
68 | ||
69 | - /* Try to move the ccw device to its new subchannel. */ | |
70 | + /* Get reference for new parent. */ | |
71 | + if (!get_device(&sch->dev)) | |
72 | + return; | |
73 | + pseudo_sch = to_subchannel(cdev->dev.parent); | |
74 | + /* | |
75 | + * Try to move the ccw device to its new subchannel. | |
76 | + * Note: device_move() changes cdev->dev.parent | |
77 | + */ | |
78 | ret = device_move(&cdev->dev, &sch->dev); | |
79 | if (ret) { | |
80 | CIO_MSG_EVENT(0, "Moving device 0.%x.%04x from orphanage " | |
81 | "failed (ret=%d)!\n", | |
82 | cdev->private->dev_id.ssid, | |
83 | cdev->private->dev_id.devno, ret); | |
84 | + /* Put reference for new parent. */ | |
85 | + put_device(&sch->dev); | |
86 | return; | |
87 | } | |
88 | sch_attach_device(sch, cdev); | |
89 | + /* Put reference on pseudo subchannel. */ | |
90 | + put_device(&pseudo_sch->dev); | |
91 | } | |
92 | ||
93 | static void sch_create_and_recog_new_device(struct subchannel *sch) | |
94 | @@ -842,9 +862,11 @@ static void sch_create_and_recog_new_dev | |
95 | spin_lock_irq(sch->lock); | |
96 | sch_set_cdev(sch, NULL); | |
97 | spin_unlock_irq(sch->lock); | |
98 | - if (cdev->dev.release) | |
99 | - cdev->dev.release(&cdev->dev); | |
100 | css_sch_device_unregister(sch); | |
101 | + /* Put reference from io_subchannel_create_ccwdev(). */ | |
102 | + put_device(&sch->dev); | |
103 | + /* Give up initial reference. */ | |
104 | + put_device(&cdev->dev); | |
105 | } | |
106 | } | |
107 | ||
108 | @@ -866,15 +888,20 @@ void ccw_device_move_to_orphanage(struct | |
109 | dev_id.devno = sch->schib.pmcw.dev; | |
110 | dev_id.ssid = sch->schid.ssid; | |
111 | ||
112 | + /* Increase refcount for pseudo subchannel. */ | |
113 | + get_device(&css->pseudo_subchannel->dev); | |
114 | /* | |
115 | * Move the orphaned ccw device to the orphanage so the replacing | |
116 | * ccw device can take its place on the subchannel. | |
117 | + * Note: device_move() changes cdev->dev.parent | |
118 | */ | |
119 | ret = device_move(&cdev->dev, &css->pseudo_subchannel->dev); | |
120 | if (ret) { | |
121 | CIO_MSG_EVENT(0, "Moving device 0.%x.%04x to orphanage failed " | |
122 | "(ret=%d)!\n", cdev->private->dev_id.ssid, | |
123 | cdev->private->dev_id.devno, ret); | |
124 | + /* Decrease refcount for pseudo subchannel again. */ | |
125 | + put_device(&css->pseudo_subchannel->dev); | |
126 | return; | |
127 | } | |
128 | cdev->ccwlock = css->pseudo_subchannel->lock; | |
129 | @@ -886,14 +913,20 @@ void ccw_device_move_to_orphanage(struct | |
130 | replacing_cdev = get_disc_ccwdev_by_dev_id(&dev_id, cdev); | |
131 | if (replacing_cdev) { | |
132 | sch_attach_disconnected_device(sch, replacing_cdev); | |
133 | + /* Release reference of subchannel from old cdev. */ | |
134 | + put_device(&sch->dev); | |
135 | return; | |
136 | } | |
137 | replacing_cdev = get_orphaned_ccwdev_by_dev_id(css, &dev_id); | |
138 | if (replacing_cdev) { | |
139 | sch_attach_orphaned_device(sch, replacing_cdev); | |
140 | + /* Release reference of subchannel from old cdev. */ | |
141 | + put_device(&sch->dev); | |
142 | return; | |
143 | } | |
144 | sch_create_and_recog_new_device(sch); | |
145 | + /* Release reference of subchannel from old cdev. */ | |
146 | + put_device(&sch->dev); | |
147 | } | |
148 | ||
149 | /* | |
150 | @@ -911,6 +944,14 @@ io_subchannel_register(struct work_struc | |
151 | priv = container_of(work, struct ccw_device_private, kick_work); | |
152 | cdev = priv->cdev; | |
153 | sch = to_subchannel(cdev->dev.parent); | |
154 | + /* | |
155 | + * Check if subchannel is still registered. It may have become | |
156 | + * unregistered if a machine check hit us after finishing | |
157 | + * device recognition but before the register work could be | |
158 | + * queued. | |
159 | + */ | |
160 | + if (!device_is_registered(&sch->dev)) | |
161 | + goto out_err; | |
162 | css_update_ssd_info(sch); | |
163 | /* | |
164 | * io_subchannel_register() will also be called after device | |
165 | @@ -942,22 +983,19 @@ io_subchannel_register(struct work_struc | |
166 | CIO_MSG_EVENT(0, "Could not register ccw dev 0.%x.%04x: %d\n", | |
167 | cdev->private->dev_id.ssid, | |
168 | cdev->private->dev_id.devno, ret); | |
169 | - put_device(&cdev->dev); | |
170 | spin_lock_irqsave(sch->lock, flags); | |
171 | sch_set_cdev(sch, NULL); | |
172 | spin_unlock_irqrestore(sch->lock, flags); | |
173 | - kfree (cdev->private); | |
174 | - kfree (cdev); | |
175 | - put_device(&sch->dev); | |
176 | - if (atomic_dec_and_test(&ccw_device_init_count)) | |
177 | - wake_up(&ccw_device_init_wq); | |
178 | - return; | |
179 | + /* Release initial device reference. */ | |
180 | + put_device(&cdev->dev); | |
181 | + goto out_err; | |
182 | } | |
183 | - put_device(&cdev->dev); | |
184 | out: | |
185 | cdev->private->flags.recog_done = 1; | |
186 | - put_device(&sch->dev); | |
187 | wake_up(&cdev->private->wait_q); | |
188 | +out_err: | |
189 | + /* Release reference for workqueue processing. */ | |
190 | + put_device(&cdev->dev); | |
191 | if (atomic_dec_and_test(&ccw_device_init_count)) | |
192 | wake_up(&ccw_device_init_wq); | |
193 | } | |
194 | @@ -1068,10 +1106,15 @@ static void ccw_device_move_to_sch(struc | |
195 | priv = container_of(work, struct ccw_device_private, kick_work); | |
196 | sch = priv->sch; | |
197 | cdev = priv->cdev; | |
198 | - former_parent = ccw_device_is_orphan(cdev) ? | |
199 | - NULL : to_subchannel(get_device(cdev->dev.parent)); | |
200 | + former_parent = to_subchannel(cdev->dev.parent); | |
201 | + /* Get reference for new parent. */ | |
202 | + if (!get_device(&sch->dev)) | |
203 | + return; | |
204 | mutex_lock(&sch->reg_mutex); | |
205 | - /* Try to move the ccw device to its new subchannel. */ | |
206 | + /* | |
207 | + * Try to move the ccw device to its new subchannel. | |
208 | + * Note: device_move() changes cdev->dev.parent | |
209 | + */ | |
210 | rc = device_move(&cdev->dev, &sch->dev); | |
211 | mutex_unlock(&sch->reg_mutex); | |
212 | if (rc) { | |
213 | @@ -1081,9 +1124,11 @@ static void ccw_device_move_to_sch(struc | |
214 | cdev->private->dev_id.devno, sch->schid.ssid, | |
215 | sch->schid.sch_no, rc); | |
216 | css_sch_device_unregister(sch); | |
217 | + /* Put reference for new parent again. */ | |
218 | + put_device(&sch->dev); | |
219 | goto out; | |
220 | } | |
221 | - if (former_parent) { | |
222 | + if (!sch_is_pseudo_sch(former_parent)) { | |
223 | spin_lock_irq(former_parent->lock); | |
224 | sch_set_cdev(former_parent, NULL); | |
225 | spin_unlock_irq(former_parent->lock); | |
226 | @@ -1094,8 +1139,8 @@ static void ccw_device_move_to_sch(struc | |
227 | } | |
228 | sch_attach_device(sch, cdev); | |
229 | out: | |
230 | - if (former_parent) | |
231 | - put_device(&former_parent->dev); | |
232 | + /* Put reference for old parent. */ | |
233 | + put_device(&former_parent->dev); | |
234 | put_device(&cdev->dev); | |
235 | } | |
236 | ||
237 | @@ -1137,6 +1182,30 @@ static void io_subchannel_init_fields(st | |
238 | sch->schib.mba = 0; | |
239 | } | |
240 | ||
241 | +static void io_subchannel_do_unreg(struct work_struct *work) | |
242 | +{ | |
243 | + struct subchannel *sch; | |
244 | + | |
245 | + sch = container_of(work, struct subchannel, work); | |
246 | + css_sch_device_unregister(sch); | |
247 | + /* Reset intparm to zeroes. */ | |
248 | + sch->schib.pmcw.intparm = 0; | |
249 | + cio_modify(sch); | |
250 | + put_device(&sch->dev); | |
251 | +} | |
252 | + | |
253 | +/* Schedule unregister if we have no cdev. */ | |
254 | +static void io_subchannel_schedule_removal(struct subchannel *sch) | |
255 | +{ | |
256 | + get_device(&sch->dev); | |
257 | + INIT_WORK(&sch->work, io_subchannel_do_unreg); | |
258 | + queue_work(slow_path_wq, &sch->work); | |
259 | +} | |
260 | + | |
261 | +/* | |
262 | + * Note: We always return 0 so that we bind to the device even on error. | |
263 | + * This is needed so that our remove function is called on unregister. | |
264 | + */ | |
265 | static int io_subchannel_probe(struct subchannel *sch) | |
266 | { | |
267 | struct ccw_device *cdev; | |
268 | @@ -1186,14 +1255,12 @@ static int io_subchannel_probe(struct su | |
269 | rc = sysfs_create_group(&sch->dev.kobj, | |
270 | &io_subchannel_attr_group); | |
271 | if (rc) | |
272 | - return rc; | |
273 | + goto out_schedule; | |
274 | /* Allocate I/O subchannel private data. */ | |
275 | sch->private = kzalloc(sizeof(struct io_subchannel_private), | |
276 | GFP_KERNEL | GFP_DMA); | |
277 | - if (!sch->private) { | |
278 | - rc = -ENOMEM; | |
279 | + if (!sch->private) | |
280 | goto out_err; | |
281 | - } | |
282 | cdev = get_disc_ccwdev_by_dev_id(&dev_id, NULL); | |
283 | if (!cdev) | |
284 | cdev = get_orphaned_ccwdev_by_dev_id(to_css(sch->dev.parent), | |
285 | @@ -1211,24 +1278,23 @@ static int io_subchannel_probe(struct su | |
286 | return 0; | |
287 | } | |
288 | cdev = io_subchannel_create_ccwdev(sch); | |
289 | - if (IS_ERR(cdev)) { | |
290 | - rc = PTR_ERR(cdev); | |
291 | + if (IS_ERR(cdev)) | |
292 | goto out_err; | |
293 | - } | |
294 | rc = io_subchannel_recog(cdev, sch); | |
295 | if (rc) { | |
296 | spin_lock_irqsave(sch->lock, flags); | |
297 | sch_set_cdev(sch, NULL); | |
298 | + io_subchannel_recog_done(cdev); | |
299 | spin_unlock_irqrestore(sch->lock, flags); | |
300 | - if (cdev->dev.release) | |
301 | - cdev->dev.release(&cdev->dev); | |
302 | - goto out_err; | |
303 | } | |
304 | return 0; | |
305 | out_err: | |
306 | kfree(sch->private); | |
307 | sysfs_remove_group(&sch->dev.kobj, &io_subchannel_attr_group); | |
308 | return rc; | |
309 | +out_schedule: | |
310 | + io_subchannel_schedule_removal(sch); | |
311 | + return 0; | |
312 | } | |
313 | ||
314 | static int | |
315 | --- linux-sles11.orig/drivers/s390/cio/cio.h | |
316 | +++ linux-sles11/drivers/s390/cio/cio.h | |
317 | @@ -82,6 +82,7 @@ struct subchannel { | |
318 | struct device dev; /* entry in device tree */ | |
319 | struct css_driver *driver; | |
320 | void *private; /* private per subchannel type data */ | |
321 | + struct work_struct work; | |
322 | } __attribute__ ((aligned(8))); | |
323 | ||
324 | #define IO_INTERRUPT_TYPE 0 /* I/O interrupt type */ |