]>
Commit | Line | Data |
---|---|---|
a65d4bac GKH |
1 | From foo@baz Sat Jul 28 10:25:26 CEST 2018 |
2 | From: Alex Williamson <alex.williamson@redhat.com> | |
3 | Date: Tue, 15 May 2018 13:53:55 -0600 | |
4 | Subject: vfio/mdev: Check globally for duplicate devices | |
5 | ||
6 | From: Alex Williamson <alex.williamson@redhat.com> | |
7 | ||
8 | [ Upstream commit 002fe996f67f4f46d8917b14cfb6e4313c20685a ] | |
9 | ||
10 | When we create an mdev device, we check for duplicates against the | |
11 | parent device and return -EEXIST if found, but the mdev device | |
12 | namespace is global since we'll link all devices from the bus. We do | |
13 | catch this later in sysfs_do_create_link_sd() to return -EEXIST, but | |
14 | with it comes a kernel warning and stack trace for trying to create | |
15 | duplicate sysfs links, which makes it an undesirable response. | |
16 | ||
17 | Therefore we should really be looking for duplicates across all mdev | |
18 | parent devices, or as implemented here, against our mdev device list. | |
19 | Using mdev_list to prevent duplicates means that we can remove | |
20 | mdev_parent.lock, but in order not to serialize mdev device creation | |
21 | and removal globally, we add mdev_device.active which allows UUIDs to | |
22 | be reserved such that we can drop the mdev_list_lock before the mdev | |
23 | device is fully in place. | |
24 | ||
25 | Two behavioral notes; first, mdev_parent.lock had the side-effect of | |
26 | serializing mdev create and remove ops per parent device. This was | |
27 | an implementation detail, not an intentional guarantee provided to | |
28 | the mdev vendor drivers. Vendor drivers can trivially provide this | |
29 | serialization internally if necessary. Second, review comments note | |
30 | the new -EAGAIN behavior when the device, and in particular the remove | |
31 | attribute, becomes visible in sysfs. If a remove is triggered prior | |
32 | to completion of mdev_device_create() the user will see a -EAGAIN | |
33 | error. While the errno is different, receiving an error during this | |
34 | period is not, the previous implementation returned -ENODEV for the | |
35 | same condition. Furthermore, the consistency to the user is improved | |
36 | in the case where mdev_device_remove_ops() returns error. Previously | |
37 | concurrent calls to mdev_device_remove() could see the device | |
38 | disappear with -ENODEV and return in the case of error. Now a user | |
39 | would see -EAGAIN while the device is in this transitory state. | |
40 | ||
41 | Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com> | |
42 | Reviewed-by: Cornelia Huck <cohuck@redhat.com> | |
43 | Acked-by: Halil Pasic <pasic@linux.ibm.com> | |
44 | Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com> | |
45 | Signed-off-by: Alex Williamson <alex.williamson@redhat.com> | |
46 | Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> | |
47 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
48 | --- | |
49 | Documentation/vfio-mediated-device.txt | 5 + | |
50 | drivers/vfio/mdev/mdev_core.c | 102 +++++++++++---------------------- | |
51 | drivers/vfio/mdev/mdev_private.h | 2 | |
52 | 3 files changed, 42 insertions(+), 67 deletions(-) | |
53 | ||
54 | --- a/Documentation/vfio-mediated-device.txt | |
55 | +++ b/Documentation/vfio-mediated-device.txt | |
56 | @@ -145,6 +145,11 @@ The functions in the mdev_parent_ops str | |
57 | * create: allocate basic resources in a driver for a mediated device | |
58 | * remove: free resources in a driver when a mediated device is destroyed | |
59 | ||
60 | +(Note that mdev-core provides no implicit serialization of create/remove | |
61 | +callbacks per mdev parent device, per mdev type, or any other categorization. | |
62 | +Vendor drivers are expected to be fully asynchronous in this respect or | |
63 | +provide their own internal resource protection.) | |
64 | + | |
65 | The callbacks in the mdev_parent_ops structure are as follows: | |
66 | ||
67 | * open: open callback of mediated device | |
68 | --- a/drivers/vfio/mdev/mdev_core.c | |
69 | +++ b/drivers/vfio/mdev/mdev_core.c | |
70 | @@ -66,34 +66,6 @@ uuid_le mdev_uuid(struct mdev_device *md | |
71 | } | |
72 | EXPORT_SYMBOL(mdev_uuid); | |
73 | ||
74 | -static int _find_mdev_device(struct device *dev, void *data) | |
75 | -{ | |
76 | - struct mdev_device *mdev; | |
77 | - | |
78 | - if (!dev_is_mdev(dev)) | |
79 | - return 0; | |
80 | - | |
81 | - mdev = to_mdev_device(dev); | |
82 | - | |
83 | - if (uuid_le_cmp(mdev->uuid, *(uuid_le *)data) == 0) | |
84 | - return 1; | |
85 | - | |
86 | - return 0; | |
87 | -} | |
88 | - | |
89 | -static bool mdev_device_exist(struct mdev_parent *parent, uuid_le uuid) | |
90 | -{ | |
91 | - struct device *dev; | |
92 | - | |
93 | - dev = device_find_child(parent->dev, &uuid, _find_mdev_device); | |
94 | - if (dev) { | |
95 | - put_device(dev); | |
96 | - return true; | |
97 | - } | |
98 | - | |
99 | - return false; | |
100 | -} | |
101 | - | |
102 | /* Should be called holding parent_list_lock */ | |
103 | static struct mdev_parent *__find_parent_device(struct device *dev) | |
104 | { | |
105 | @@ -221,7 +193,6 @@ int mdev_register_device(struct device * | |
106 | } | |
107 | ||
108 | kref_init(&parent->ref); | |
109 | - mutex_init(&parent->lock); | |
110 | ||
111 | parent->dev = dev; | |
112 | parent->ops = ops; | |
113 | @@ -297,6 +268,10 @@ static void mdev_device_release(struct d | |
114 | { | |
115 | struct mdev_device *mdev = to_mdev_device(dev); | |
116 | ||
117 | + mutex_lock(&mdev_list_lock); | |
118 | + list_del(&mdev->next); | |
119 | + mutex_unlock(&mdev_list_lock); | |
120 | + | |
121 | dev_dbg(&mdev->dev, "MDEV: destroying\n"); | |
122 | kfree(mdev); | |
123 | } | |
124 | @@ -304,7 +279,7 @@ static void mdev_device_release(struct d | |
125 | int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid) | |
126 | { | |
127 | int ret; | |
128 | - struct mdev_device *mdev; | |
129 | + struct mdev_device *mdev, *tmp; | |
130 | struct mdev_parent *parent; | |
131 | struct mdev_type *type = to_mdev_type(kobj); | |
132 | ||
133 | @@ -312,21 +287,28 @@ int mdev_device_create(struct kobject *k | |
134 | if (!parent) | |
135 | return -EINVAL; | |
136 | ||
137 | - mutex_lock(&parent->lock); | |
138 | + mutex_lock(&mdev_list_lock); | |
139 | ||
140 | /* Check for duplicate */ | |
141 | - if (mdev_device_exist(parent, uuid)) { | |
142 | - ret = -EEXIST; | |
143 | - goto create_err; | |
144 | + list_for_each_entry(tmp, &mdev_list, next) { | |
145 | + if (!uuid_le_cmp(tmp->uuid, uuid)) { | |
146 | + mutex_unlock(&mdev_list_lock); | |
147 | + ret = -EEXIST; | |
148 | + goto mdev_fail; | |
149 | + } | |
150 | } | |
151 | ||
152 | mdev = kzalloc(sizeof(*mdev), GFP_KERNEL); | |
153 | if (!mdev) { | |
154 | + mutex_unlock(&mdev_list_lock); | |
155 | ret = -ENOMEM; | |
156 | - goto create_err; | |
157 | + goto mdev_fail; | |
158 | } | |
159 | ||
160 | memcpy(&mdev->uuid, &uuid, sizeof(uuid_le)); | |
161 | + list_add(&mdev->next, &mdev_list); | |
162 | + mutex_unlock(&mdev_list_lock); | |
163 | + | |
164 | mdev->parent = parent; | |
165 | kref_init(&mdev->ref); | |
166 | ||
167 | @@ -338,35 +320,28 @@ int mdev_device_create(struct kobject *k | |
168 | ret = device_register(&mdev->dev); | |
169 | if (ret) { | |
170 | put_device(&mdev->dev); | |
171 | - goto create_err; | |
172 | + goto mdev_fail; | |
173 | } | |
174 | ||
175 | ret = mdev_device_create_ops(kobj, mdev); | |
176 | if (ret) | |
177 | - goto create_failed; | |
178 | + goto create_fail; | |
179 | ||
180 | ret = mdev_create_sysfs_files(&mdev->dev, type); | |
181 | if (ret) { | |
182 | mdev_device_remove_ops(mdev, true); | |
183 | - goto create_failed; | |
184 | + goto create_fail; | |
185 | } | |
186 | ||
187 | mdev->type_kobj = kobj; | |
188 | + mdev->active = true; | |
189 | dev_dbg(&mdev->dev, "MDEV: created\n"); | |
190 | ||
191 | - mutex_unlock(&parent->lock); | |
192 | - | |
193 | - mutex_lock(&mdev_list_lock); | |
194 | - list_add(&mdev->next, &mdev_list); | |
195 | - mutex_unlock(&mdev_list_lock); | |
196 | - | |
197 | - return ret; | |
198 | + return 0; | |
199 | ||
200 | -create_failed: | |
201 | +create_fail: | |
202 | device_unregister(&mdev->dev); | |
203 | - | |
204 | -create_err: | |
205 | - mutex_unlock(&parent->lock); | |
206 | +mdev_fail: | |
207 | mdev_put_parent(parent); | |
208 | return ret; | |
209 | } | |
210 | @@ -377,44 +352,39 @@ int mdev_device_remove(struct device *de | |
211 | struct mdev_parent *parent; | |
212 | struct mdev_type *type; | |
213 | int ret; | |
214 | - bool found = false; | |
215 | ||
216 | mdev = to_mdev_device(dev); | |
217 | ||
218 | mutex_lock(&mdev_list_lock); | |
219 | list_for_each_entry(tmp, &mdev_list, next) { | |
220 | - if (tmp == mdev) { | |
221 | - found = true; | |
222 | + if (tmp == mdev) | |
223 | break; | |
224 | - } | |
225 | } | |
226 | ||
227 | - if (found) | |
228 | - list_del(&mdev->next); | |
229 | + if (tmp != mdev) { | |
230 | + mutex_unlock(&mdev_list_lock); | |
231 | + return -ENODEV; | |
232 | + } | |
233 | ||
234 | - mutex_unlock(&mdev_list_lock); | |
235 | + if (!mdev->active) { | |
236 | + mutex_unlock(&mdev_list_lock); | |
237 | + return -EAGAIN; | |
238 | + } | |
239 | ||
240 | - if (!found) | |
241 | - return -ENODEV; | |
242 | + mdev->active = false; | |
243 | + mutex_unlock(&mdev_list_lock); | |
244 | ||
245 | type = to_mdev_type(mdev->type_kobj); | |
246 | parent = mdev->parent; | |
247 | - mutex_lock(&parent->lock); | |
248 | ||
249 | ret = mdev_device_remove_ops(mdev, force_remove); | |
250 | if (ret) { | |
251 | - mutex_unlock(&parent->lock); | |
252 | - | |
253 | - mutex_lock(&mdev_list_lock); | |
254 | - list_add(&mdev->next, &mdev_list); | |
255 | - mutex_unlock(&mdev_list_lock); | |
256 | - | |
257 | + mdev->active = true; | |
258 | return ret; | |
259 | } | |
260 | ||
261 | mdev_remove_sysfs_files(dev, type); | |
262 | device_unregister(dev); | |
263 | - mutex_unlock(&parent->lock); | |
264 | mdev_put_parent(parent); | |
265 | ||
266 | return 0; | |
267 | --- a/drivers/vfio/mdev/mdev_private.h | |
268 | +++ b/drivers/vfio/mdev/mdev_private.h | |
269 | @@ -20,7 +20,6 @@ struct mdev_parent { | |
270 | struct device *dev; | |
271 | const struct mdev_parent_ops *ops; | |
272 | struct kref ref; | |
273 | - struct mutex lock; | |
274 | struct list_head next; | |
275 | struct kset *mdev_types_kset; | |
276 | struct list_head type_list; | |
277 | @@ -34,6 +33,7 @@ struct mdev_device { | |
278 | struct kref ref; | |
279 | struct list_head next; | |
280 | struct kobject *type_kobj; | |
281 | + bool active; | |
282 | }; | |
283 | ||
284 | #define to_mdev_device(dev) container_of(dev, struct mdev_device, dev) |