1 From: Gerald Schaefer <geraldsc@de.ibm.com>
2 Subject: cio: Crashes when repeatetly attaching/detaching devices.
5 Symptom: Oops in dmesg after attaching/detaching a device, subsequent
7 Problem: Incorrect reference counting of subchannel in relation to
8 ccw devices, missing check for delayed registering of ccw
9 devices and incorrectly failing the probe function for I/O
10 subchannels (which leads to unbound subchannels that can't
12 Solution: Make sure that the ccw device holds a reference of the
13 subchannel, that it is not registered if the subchannel is
14 not registered anymore, and schedule unregistering an I/O
15 subchannel if probing encounters an error.
17 Acked-by: John Jolly <jjolly@suse.de>
19 drivers/s390/cio/cio.h | 1
20 drivers/s390/cio/device.c | 128 ++++++++++++++++++++++++++++++++++------------
21 2 files changed, 98 insertions(+), 31 deletions(-)
23 --- linux-sles11.orig/drivers/s390/cio/device.c
24 +++ linux-sles11/drivers/s390/cio/device.c
25 @@ -716,6 +716,8 @@ ccw_device_release(struct device *dev)
26 struct ccw_device *cdev;
28 cdev = to_ccwdev(dev);
29 + /* Release reference of parent subchannel. */
30 + put_device(cdev->dev.parent);
34 @@ -790,37 +792,55 @@ static void sch_attach_disconnected_devi
35 struct subchannel *other_sch;
38 - other_sch = to_subchannel(get_device(cdev->dev.parent));
39 + /* Get reference for new parent. */
40 + if (!get_device(&sch->dev))
42 + other_sch = to_subchannel(cdev->dev.parent);
43 + /* Note: device_move() changes cdev->dev.parent */
44 ret = device_move(&cdev->dev, &sch->dev);
46 CIO_MSG_EVENT(0, "Moving disconnected device 0.%x.%04x failed "
47 "(ret=%d)!\n", cdev->private->dev_id.ssid,
48 cdev->private->dev_id.devno, ret);
49 - put_device(&other_sch->dev);
50 + /* Put reference for new parent. */
51 + put_device(&sch->dev);
54 sch_set_cdev(other_sch, NULL);
55 /* No need to keep a subchannel without ccw device around. */
56 css_sch_device_unregister(other_sch);
57 - put_device(&other_sch->dev);
58 sch_attach_device(sch, cdev);
59 + /* Put reference for old parent. */
60 + put_device(&other_sch->dev);
63 static void sch_attach_orphaned_device(struct subchannel *sch,
64 struct ccw_device *cdev)
67 + struct subchannel *pseudo_sch;
69 - /* Try to move the ccw device to its new subchannel. */
70 + /* Get reference for new parent. */
71 + if (!get_device(&sch->dev))
73 + pseudo_sch = to_subchannel(cdev->dev.parent);
75 + * Try to move the ccw device to its new subchannel.
76 + * Note: device_move() changes cdev->dev.parent
78 ret = device_move(&cdev->dev, &sch->dev);
80 CIO_MSG_EVENT(0, "Moving device 0.%x.%04x from orphanage "
82 cdev->private->dev_id.ssid,
83 cdev->private->dev_id.devno, ret);
84 + /* Put reference for new parent. */
85 + put_device(&sch->dev);
88 sch_attach_device(sch, cdev);
89 + /* Put reference on pseudo subchannel. */
90 + put_device(&pseudo_sch->dev);
93 static void sch_create_and_recog_new_device(struct subchannel *sch)
94 @@ -842,9 +862,11 @@ static void sch_create_and_recog_new_dev
95 spin_lock_irq(sch->lock);
96 sch_set_cdev(sch, NULL);
97 spin_unlock_irq(sch->lock);
98 - if (cdev->dev.release)
99 - cdev->dev.release(&cdev->dev);
100 css_sch_device_unregister(sch);
101 + /* Put reference from io_subchannel_create_ccwdev(). */
102 + put_device(&sch->dev);
103 + /* Give up initial reference. */
104 + put_device(&cdev->dev);
108 @@ -866,15 +888,20 @@ void ccw_device_move_to_orphanage(struct
109 dev_id.devno = sch->schib.pmcw.dev;
110 dev_id.ssid = sch->schid.ssid;
112 + /* Increase refcount for pseudo subchannel. */
113 + get_device(&css->pseudo_subchannel->dev);
115 * Move the orphaned ccw device to the orphanage so the replacing
116 * ccw device can take its place on the subchannel.
117 + * Note: device_move() changes cdev->dev.parent
119 ret = device_move(&cdev->dev, &css->pseudo_subchannel->dev);
121 CIO_MSG_EVENT(0, "Moving device 0.%x.%04x to orphanage failed "
122 "(ret=%d)!\n", cdev->private->dev_id.ssid,
123 cdev->private->dev_id.devno, ret);
124 + /* Decrease refcount for pseudo subchannel again. */
125 + put_device(&css->pseudo_subchannel->dev);
128 cdev->ccwlock = css->pseudo_subchannel->lock;
129 @@ -886,14 +913,20 @@ void ccw_device_move_to_orphanage(struct
130 replacing_cdev = get_disc_ccwdev_by_dev_id(&dev_id, cdev);
131 if (replacing_cdev) {
132 sch_attach_disconnected_device(sch, replacing_cdev);
133 + /* Release reference of subchannel from old cdev. */
134 + put_device(&sch->dev);
137 replacing_cdev = get_orphaned_ccwdev_by_dev_id(css, &dev_id);
138 if (replacing_cdev) {
139 sch_attach_orphaned_device(sch, replacing_cdev);
140 + /* Release reference of subchannel from old cdev. */
141 + put_device(&sch->dev);
144 sch_create_and_recog_new_device(sch);
145 + /* Release reference of subchannel from old cdev. */
146 + put_device(&sch->dev);
150 @@ -911,6 +944,14 @@ io_subchannel_register(struct work_struc
151 priv = container_of(work, struct ccw_device_private, kick_work);
153 sch = to_subchannel(cdev->dev.parent);
155 + * Check if subchannel is still registered. It may have become
156 + * unregistered if a machine check hit us after finishing
157 + * device recognition but before the register work could be
160 + if (!device_is_registered(&sch->dev))
162 css_update_ssd_info(sch);
164 * io_subchannel_register() will also be called after device
165 @@ -942,22 +983,19 @@ io_subchannel_register(struct work_struc
166 CIO_MSG_EVENT(0, "Could not register ccw dev 0.%x.%04x: %d\n",
167 cdev->private->dev_id.ssid,
168 cdev->private->dev_id.devno, ret);
169 - put_device(&cdev->dev);
170 spin_lock_irqsave(sch->lock, flags);
171 sch_set_cdev(sch, NULL);
172 spin_unlock_irqrestore(sch->lock, flags);
173 - kfree (cdev->private);
175 - put_device(&sch->dev);
176 - if (atomic_dec_and_test(&ccw_device_init_count))
177 - wake_up(&ccw_device_init_wq);
179 + /* Release initial device reference. */
180 + put_device(&cdev->dev);
183 - put_device(&cdev->dev);
185 cdev->private->flags.recog_done = 1;
186 - put_device(&sch->dev);
187 wake_up(&cdev->private->wait_q);
189 + /* Release reference for workqueue processing. */
190 + put_device(&cdev->dev);
191 if (atomic_dec_and_test(&ccw_device_init_count))
192 wake_up(&ccw_device_init_wq);
194 @@ -1068,10 +1106,15 @@ static void ccw_device_move_to_sch(struc
195 priv = container_of(work, struct ccw_device_private, kick_work);
198 - former_parent = ccw_device_is_orphan(cdev) ?
199 - NULL : to_subchannel(get_device(cdev->dev.parent));
200 + former_parent = to_subchannel(cdev->dev.parent);
201 + /* Get reference for new parent. */
202 + if (!get_device(&sch->dev))
204 mutex_lock(&sch->reg_mutex);
205 - /* Try to move the ccw device to its new subchannel. */
207 + * Try to move the ccw device to its new subchannel.
208 + * Note: device_move() changes cdev->dev.parent
210 rc = device_move(&cdev->dev, &sch->dev);
211 mutex_unlock(&sch->reg_mutex);
213 @@ -1081,9 +1124,11 @@ static void ccw_device_move_to_sch(struc
214 cdev->private->dev_id.devno, sch->schid.ssid,
215 sch->schid.sch_no, rc);
216 css_sch_device_unregister(sch);
217 + /* Put reference for new parent again. */
218 + put_device(&sch->dev);
221 - if (former_parent) {
222 + if (!sch_is_pseudo_sch(former_parent)) {
223 spin_lock_irq(former_parent->lock);
224 sch_set_cdev(former_parent, NULL);
225 spin_unlock_irq(former_parent->lock);
226 @@ -1094,8 +1139,8 @@ static void ccw_device_move_to_sch(struc
228 sch_attach_device(sch, cdev);
231 - put_device(&former_parent->dev);
232 + /* Put reference for old parent. */
233 + put_device(&former_parent->dev);
234 put_device(&cdev->dev);
237 @@ -1137,6 +1182,30 @@ static void io_subchannel_init_fields(st
241 +static void io_subchannel_do_unreg(struct work_struct *work)
243 + struct subchannel *sch;
245 + sch = container_of(work, struct subchannel, work);
246 + css_sch_device_unregister(sch);
247 + /* Reset intparm to zeroes. */
248 + sch->schib.pmcw.intparm = 0;
250 + put_device(&sch->dev);
253 +/* Schedule unregister if we have no cdev. */
254 +static void io_subchannel_schedule_removal(struct subchannel *sch)
256 + get_device(&sch->dev);
257 + INIT_WORK(&sch->work, io_subchannel_do_unreg);
258 + queue_work(slow_path_wq, &sch->work);
262 + * Note: We always return 0 so that we bind to the device even on error.
263 + * This is needed so that our remove function is called on unregister.
265 static int io_subchannel_probe(struct subchannel *sch)
267 struct ccw_device *cdev;
268 @@ -1186,14 +1255,12 @@ static int io_subchannel_probe(struct su
269 rc = sysfs_create_group(&sch->dev.kobj,
270 &io_subchannel_attr_group);
274 /* Allocate I/O subchannel private data. */
275 sch->private = kzalloc(sizeof(struct io_subchannel_private),
276 GFP_KERNEL | GFP_DMA);
277 - if (!sch->private) {
282 cdev = get_disc_ccwdev_by_dev_id(&dev_id, NULL);
284 cdev = get_orphaned_ccwdev_by_dev_id(to_css(sch->dev.parent),
285 @@ -1211,24 +1278,23 @@ static int io_subchannel_probe(struct su
288 cdev = io_subchannel_create_ccwdev(sch);
289 - if (IS_ERR(cdev)) {
290 - rc = PTR_ERR(cdev);
294 rc = io_subchannel_recog(cdev, sch);
296 spin_lock_irqsave(sch->lock, flags);
297 sch_set_cdev(sch, NULL);
298 + io_subchannel_recog_done(cdev);
299 spin_unlock_irqrestore(sch->lock, flags);
300 - if (cdev->dev.release)
301 - cdev->dev.release(&cdev->dev);
307 sysfs_remove_group(&sch->dev.kobj, &io_subchannel_attr_group);
310 + io_subchannel_schedule_removal(sch);
315 --- linux-sles11.orig/drivers/s390/cio/cio.h
316 +++ linux-sles11/drivers/s390/cio/cio.h
317 @@ -82,6 +82,7 @@ struct subchannel {
318 struct device dev; /* entry in device tree */
319 struct css_driver *driver;
320 void *private; /* private per subchannel type data */
321 + struct work_struct work;
322 } __attribute__ ((aligned(8)));
324 #define IO_INTERRUPT_TYPE 0 /* I/O interrupt type */