1 From foo@baz Sat Jul 28 10:25:26 CEST 2018
2 From: Alex Williamson <alex.williamson@redhat.com>
3 Date: Tue, 15 May 2018 13:53:55 -0600
4 Subject: vfio/mdev: Check globally for duplicate devices
6 From: Alex Williamson <alex.williamson@redhat.com>
8 [ Upstream commit 002fe996f67f4f46d8917b14cfb6e4313c20685a ]
10 When we create an mdev device, we check for duplicates against the
11 parent device and return -EEXIST if found, but the mdev device
12 namespace is global since we'll link all devices from the bus. We do
13 catch this later in sysfs_do_create_link_sd() to return -EEXIST, but
14 with it comes a kernel warning and stack trace for trying to create
15 duplicate sysfs links, which makes it an undesirable response.
17 Therefore we should really be looking for duplicates across all mdev
18 parent devices, or as implemented here, against our mdev device list.
19 Using mdev_list to prevent duplicates means that we can remove
20 mdev_parent.lock, but in order not to serialize mdev device creation
21 and removal globally, we add mdev_device.active which allows UUIDs to
22 be reserved such that we can drop the mdev_list_lock before the mdev
23 device is fully in place.
25 Two behavioral notes; first, mdev_parent.lock had the side-effect of
26 serializing mdev create and remove ops per parent device. This was
27 an implementation detail, not an intentional guarantee provided to
28 the mdev vendor drivers. Vendor drivers can trivially provide this
29 serialization internally if necessary. Second, review comments note
30 the new -EAGAIN behavior when the device, and in particular the remove
31 attribute, becomes visible in sysfs. If a remove is triggered prior
32 to completion of mdev_device_create() the user will see a -EAGAIN
33 error. While the errno is different, receiving an error during this
34 period is not, the previous implementation returned -ENODEV for the
35 same condition. Furthermore, the consistency to the user is improved
36 in the case where mdev_device_remove_ops() returns error. Previously
37 concurrent calls to mdev_device_remove() could see the device
38 disappear with -ENODEV and return in the case of error. Now a user
39 would see -EAGAIN while the device is in this transitory state.
41 Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>
42 Reviewed-by: Cornelia Huck <cohuck@redhat.com>
43 Acked-by: Halil Pasic <pasic@linux.ibm.com>
44 Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>
45 Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
46 Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
47 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
49 Documentation/vfio-mediated-device.txt | 5 +
50 drivers/vfio/mdev/mdev_core.c | 102 +++++++++++----------------------
51 drivers/vfio/mdev/mdev_private.h | 2
52 3 files changed, 42 insertions(+), 67 deletions(-)
54 --- a/Documentation/vfio-mediated-device.txt
55 +++ b/Documentation/vfio-mediated-device.txt
56 @@ -145,6 +145,11 @@ The functions in the mdev_parent_ops str
57 * create: allocate basic resources in a driver for a mediated device
58 * remove: free resources in a driver when a mediated device is destroyed
60 +(Note that mdev-core provides no implicit serialization of create/remove
61 +callbacks per mdev parent device, per mdev type, or any other categorization.
62 +Vendor drivers are expected to be fully asynchronous in this respect or
63 +provide their own internal resource protection.)
65 The callbacks in the mdev_parent_ops structure are as follows:
67 * open: open callback of mediated device
68 --- a/drivers/vfio/mdev/mdev_core.c
69 +++ b/drivers/vfio/mdev/mdev_core.c
70 @@ -66,34 +66,6 @@ uuid_le mdev_uuid(struct mdev_device *md
72 EXPORT_SYMBOL(mdev_uuid);
74 -static int _find_mdev_device(struct device *dev, void *data)
76 - struct mdev_device *mdev;
78 - if (!dev_is_mdev(dev))
81 - mdev = to_mdev_device(dev);
83 - if (uuid_le_cmp(mdev->uuid, *(uuid_le *)data) == 0)
89 -static bool mdev_device_exist(struct mdev_parent *parent, uuid_le uuid)
93 - dev = device_find_child(parent->dev, &uuid, _find_mdev_device);
102 /* Should be called holding parent_list_lock */
103 static struct mdev_parent *__find_parent_device(struct device *dev)
105 @@ -221,7 +193,6 @@ int mdev_register_device(struct device *
108 kref_init(&parent->ref);
109 - mutex_init(&parent->lock);
113 @@ -297,6 +268,10 @@ static void mdev_device_release(struct d
115 struct mdev_device *mdev = to_mdev_device(dev);
117 + mutex_lock(&mdev_list_lock);
118 + list_del(&mdev->next);
119 + mutex_unlock(&mdev_list_lock);
121 dev_dbg(&mdev->dev, "MDEV: destroying\n");
124 @@ -304,7 +279,7 @@ static void mdev_device_release(struct d
125 int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid)
128 - struct mdev_device *mdev;
129 + struct mdev_device *mdev, *tmp;
130 struct mdev_parent *parent;
131 struct mdev_type *type = to_mdev_type(kobj);
133 @@ -312,21 +287,28 @@ int mdev_device_create(struct kobject *k
137 - mutex_lock(&parent->lock);
138 + mutex_lock(&mdev_list_lock);
140 /* Check for duplicate */
141 - if (mdev_device_exist(parent, uuid)) {
144 + list_for_each_entry(tmp, &mdev_list, next) {
145 + if (!uuid_le_cmp(tmp->uuid, uuid)) {
146 + mutex_unlock(&mdev_list_lock);
152 mdev = kzalloc(sizeof(*mdev), GFP_KERNEL);
154 + mutex_unlock(&mdev_list_lock);
160 memcpy(&mdev->uuid, &uuid, sizeof(uuid_le));
161 + list_add(&mdev->next, &mdev_list);
162 + mutex_unlock(&mdev_list_lock);
164 mdev->parent = parent;
165 kref_init(&mdev->ref);
167 @@ -338,35 +320,28 @@ int mdev_device_create(struct kobject *k
168 ret = device_register(&mdev->dev);
170 put_device(&mdev->dev);
175 ret = mdev_device_create_ops(kobj, mdev);
177 - goto create_failed;
180 ret = mdev_create_sysfs_files(&mdev->dev, type);
182 mdev_device_remove_ops(mdev, true);
183 - goto create_failed;
187 mdev->type_kobj = kobj;
188 + mdev->active = true;
189 dev_dbg(&mdev->dev, "MDEV: created\n");
191 - mutex_unlock(&parent->lock);
193 - mutex_lock(&mdev_list_lock);
194 - list_add(&mdev->next, &mdev_list);
195 - mutex_unlock(&mdev_list_lock);
202 device_unregister(&mdev->dev);
205 - mutex_unlock(&parent->lock);
207 mdev_put_parent(parent);
210 @@ -377,44 +352,39 @@ int mdev_device_remove(struct device *de
211 struct mdev_parent *parent;
212 struct mdev_type *type;
214 - bool found = false;
216 mdev = to_mdev_device(dev);
218 mutex_lock(&mdev_list_lock);
219 list_for_each_entry(tmp, &mdev_list, next) {
228 - list_del(&mdev->next);
230 + mutex_unlock(&mdev_list_lock);
234 - mutex_unlock(&mdev_list_lock);
235 + if (!mdev->active) {
236 + mutex_unlock(&mdev_list_lock);
242 + mdev->active = false;
243 + mutex_unlock(&mdev_list_lock);
245 type = to_mdev_type(mdev->type_kobj);
246 parent = mdev->parent;
247 - mutex_lock(&parent->lock);
249 ret = mdev_device_remove_ops(mdev, force_remove);
251 - mutex_unlock(&parent->lock);
253 - mutex_lock(&mdev_list_lock);
254 - list_add(&mdev->next, &mdev_list);
255 - mutex_unlock(&mdev_list_lock);
257 + mdev->active = true;
261 mdev_remove_sysfs_files(dev, type);
262 device_unregister(dev);
263 - mutex_unlock(&parent->lock);
264 mdev_put_parent(parent);
267 --- a/drivers/vfio/mdev/mdev_private.h
268 +++ b/drivers/vfio/mdev/mdev_private.h
269 @@ -20,7 +20,6 @@ struct mdev_parent {
271 const struct mdev_parent_ops *ops;
274 struct list_head next;
275 struct kset *mdev_types_kset;
276 struct list_head type_list;
277 @@ -34,6 +33,7 @@ struct mdev_device {
279 struct list_head next;
280 struct kobject *type_kobj;
284 #define to_mdev_device(dev) container_of(dev, struct mdev_device, dev)