]> git.ipfire.org Git - thirdparty/linux.git/blame - drivers/infiniband/core/uverbs_main.c
Merge tag 'mmc-v6.16-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/mmc
[thirdparty/linux.git] / drivers / infiniband / core / uverbs_main.c
CommitLineData
bc38a6ab
RD
1/*
2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
33b9b3ee 3 * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
2a1d9b7f
RD
4 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
5 * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
67cdb40c 6 * Copyright (c) 2005 PathScale, Inc. All rights reserved.
bc38a6ab
RD
7 *
8 * This software is available to you under a choice of one of two
9 * licenses. You may choose to be licensed under the terms of the GNU
10 * General Public License (GPL) Version 2, available from the file
11 * COPYING in the main directory of this source tree, or the
12 * OpenIB.org BSD license below:
13 *
14 * Redistribution and use in source and binary forms, with or
15 * without modification, are permitted provided that the following
16 * conditions are met:
17 *
18 * - Redistributions of source code must retain the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer.
21 *
22 * - Redistributions in binary form must reproduce the above
23 * copyright notice, this list of conditions and the following
24 * disclaimer in the documentation and/or other materials
25 * provided with the distribution.
26 *
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE.
bc38a6ab
RD
35 */
36
37#include <linux/module.h>
38#include <linux/init.h>
39#include <linux/device.h>
40#include <linux/err.h>
41#include <linux/fs.h>
42#include <linux/poll.h>
a99bbaf5 43#include <linux/sched.h>
bc38a6ab 44#include <linux/file.h>
70a30e16 45#include <linux/cdev.h>
a265e558 46#include <linux/anon_inodes.h>
5a0e3ad6 47#include <linux/slab.h>
5f9794dc 48#include <linux/sched/mm.h>
bc38a6ab 49
7c0f6ba6 50#include <linux/uaccess.h>
bc38a6ab 51
e6bd18f5 52#include <rdma/ib.h>
52427112 53#include <rdma/uverbs_std_types.h>
8f71bb00 54#include <rdma/rdma_netlink.h>
61e51682 55#include <rdma/ib_ucaps.h>
e6bd18f5 56
bc38a6ab 57#include "uverbs.h"
43579b5f 58#include "core_priv.h"
fd3c7904 59#include "rdma_core.h"
bc38a6ab
RD
60
61MODULE_AUTHOR("Roland Dreier");
62MODULE_DESCRIPTION("InfiniBand userspace verbs access");
63MODULE_LICENSE("Dual BSD/GPL");
64
bc38a6ab
RD
65enum {
66 IB_UVERBS_MAJOR = 231,
67 IB_UVERBS_BASE_MINOR = 192,
8cf12d77
HN
68 IB_UVERBS_MAX_DEVICES = RDMA_MAX_PORTS,
69 IB_UVERBS_NUM_FIXED_MINOR = 32,
70 IB_UVERBS_NUM_DYNAMIC_MINOR = IB_UVERBS_MAX_DEVICES - IB_UVERBS_NUM_FIXED_MINOR,
bc38a6ab
RD
71};
72
73#define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR)
74
8cf12d77 75static dev_t dynamic_uverbs_dev;
70a30e16 76
90f6e41c 77static DEFINE_IDA(uverbs_ida);
11a0ae4c 78static int ib_uverbs_add_one(struct ib_device *device);
7c1eb45a 79static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
51976c6c 80static struct ib_client uverbs_client;
bc38a6ab 81
64917f4c
IO
82static char *uverbs_devnode(const struct device *dev, umode_t *mode)
83{
84 if (mode)
85 *mode = 0666;
86 return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
87}
88
89static const struct class uverbs_class = {
90 .name = "infiniband_verbs",
91 .devnode = uverbs_devnode,
92};
93
22fa27fb
JG
94/*
95 * Must be called with the ufile->device->disassociate_srcu held, and the lock
96 * must be held until use of the ucontext is finished.
97 */
8313c10f 98struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile)
7dc08dcf 99{
22fa27fb
JG
100 /*
101 * We do not hold the hw_destroy_rwsem lock for this flow, instead
102 * srcu is used. It does not matter if someone races this with
103 * get_context, we get NULL or valid ucontext.
104 */
105 struct ib_ucontext *ucontext = smp_load_acquire(&ufile->ucontext);
106
107 if (!srcu_dereference(ufile->device->ib_dev,
108 &ufile->device->disassociate_srcu))
109 return ERR_PTR(-EIO);
110
111 if (!ucontext)
112 return ERR_PTR(-EINVAL);
113
114 return ucontext;
7dc08dcf 115}
8313c10f 116EXPORT_SYMBOL(ib_uverbs_get_ucontext_file);
7dc08dcf 117
feb7c1e3
CH
118int uverbs_dealloc_mw(struct ib_mw *mw)
119{
120 struct ib_pd *pd = mw->pd;
121 int ret;
122
3023a1e9 123 ret = mw->device->ops.dealloc_mw(mw);
d18bb3e1
LR
124 if (ret)
125 return ret;
126
127 atomic_dec(&pd->usecnt);
128 kfree(mw);
feb7c1e3
CH
129 return ret;
130}
131
c5c4d92e 132static void ib_uverbs_release_dev(struct device *device)
70a30e16
RD
133{
134 struct ib_uverbs_device *dev =
c5c4d92e 135 container_of(device, struct ib_uverbs_device, dev);
70a30e16 136
9ed3e5f4 137 uverbs_destroy_api(dev->uapi);
036b1063 138 cleanup_srcu_struct(&dev->disassociate_srcu);
56594ae1
PP
139 mutex_destroy(&dev->lists_mutex);
140 mutex_destroy(&dev->xrcd_tree_mutex);
35d4a0b6 141 kfree(dev);
70a30e16
RD
142}
143
5c55cfd6
JG
144void ib_uverbs_release_ucq(struct ib_uverbs_completion_event_file *ev_file,
145 struct ib_ucq_object *uobj)
70a30e16
RD
146{
147 struct ib_uverbs_event *evt, *tmp;
148
149 if (ev_file) {
db1b5ddd 150 spin_lock_irq(&ev_file->ev_queue.lock);
70a30e16
RD
151 list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) {
152 list_del(&evt->list);
153 kfree(evt);
154 }
db1b5ddd 155 spin_unlock_irq(&ev_file->ev_queue.lock);
70a30e16 156
d0259e82 157 uverbs_uobject_put(&ev_file->uobj);
70a30e16
RD
158 }
159
5c55cfd6 160 ib_uverbs_release_uevent(&uobj->uevent);
70a30e16
RD
161}
162
5c55cfd6 163void ib_uverbs_release_uevent(struct ib_uevent_object *uobj)
70a30e16 164{
98a8890f 165 struct ib_uverbs_async_event_file *async_file = uobj->event_file;
70a30e16
RD
166 struct ib_uverbs_event *evt, *tmp;
167
a1123418
JG
168 if (!async_file)
169 return;
170
5c55cfd6 171 spin_lock_irq(&async_file->ev_queue.lock);
70a30e16
RD
172 list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
173 list_del(&evt->list);
174 kfree(evt);
175 }
5c55cfd6 176 spin_unlock_irq(&async_file->ev_queue.lock);
98a8890f 177 uverbs_uobject_put(&async_file->uobj);
70a30e16
RD
178}
179
6be60aed
MB
180void ib_uverbs_detach_umcast(struct ib_qp *qp,
181 struct ib_uqp_object *uobj)
f4e40156
JM
182{
183 struct ib_uverbs_mcast_entry *mcast, *tmp;
184
185 list_for_each_entry_safe(mcast, tmp, &uobj->mcast_list, list) {
186 ib_detach_mcast(qp, &mcast->gid, mcast->lid);
187 list_del(&mcast->list);
188 kfree(mcast);
189 }
190}
191
35d4a0b6
YH
192static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev)
193{
194 complete(&dev->comp);
195}
196
cf8966b3 197void ib_uverbs_release_file(struct kref *ref)
bc38a6ab
RD
198{
199 struct ib_uverbs_file *file =
200 container_of(ref, struct ib_uverbs_file, ref);
036b1063
YH
201 struct ib_device *ib_dev;
202 int srcu_key;
203
0f50d88a
JG
204 release_ufile_idr_uobject(file);
205
036b1063
YH
206 srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
207 ib_dev = srcu_dereference(file->device->ib_dev,
208 &file->device->disassociate_srcu);
3023a1e9 209 if (ib_dev && !ib_dev->ops.disassociate_ucontext)
7a154142 210 module_put(ib_dev->ops.owner);
036b1063 211 srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
bc38a6ab 212
ec9bf373 213 if (refcount_dec_and_test(&file->device->refcount))
35d4a0b6 214 ib_uverbs_comp_dev(file->device);
70a30e16 215
98a8890f
YH
216 if (file->default_async_file)
217 uverbs_uobject_put(&file->default_async_file->uobj);
c5c4d92e 218 put_device(&file->device->dev);
67f269b3
JG
219
220 if (file->disassociate_page)
221 __free_pages(file->disassociate_page, 0);
51976c6c 222 mutex_destroy(&file->disassociation_lock);
56594ae1
PP
223 mutex_destroy(&file->umap_lock);
224 mutex_destroy(&file->ucontext_lock);
bc38a6ab
RD
225 kfree(file);
226}
227
db1b5ddd 228static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue,
1e7710f3
MB
229 struct file *filp, char __user *buf,
230 size_t count, loff_t *pos,
e0fcc611 231 size_t eventsz)
bc38a6ab 232{
63aaf647 233 struct ib_uverbs_event *event;
bc38a6ab
RD
234 int ret = 0;
235
db1b5ddd 236 spin_lock_irq(&ev_queue->lock);
bc38a6ab 237
db1b5ddd 238 while (list_empty(&ev_queue->event_list)) {
62fab312
YH
239 if (ev_queue->is_closed) {
240 spin_unlock_irq(&ev_queue->lock);
241 return -EIO;
242 }
bc38a6ab 243
62fab312 244 spin_unlock_irq(&ev_queue->lock);
bc38a6ab
RD
245 if (filp->f_flags & O_NONBLOCK)
246 return -EAGAIN;
247
db1b5ddd
MB
248 if (wait_event_interruptible(ev_queue->poll_wait,
249 (!list_empty(&ev_queue->event_list) ||
14e23bd6 250 ev_queue->is_closed)))
bc38a6ab
RD
251 return -ERESTARTSYS;
252
14e23bd6 253 spin_lock_irq(&ev_queue->lock);
bc38a6ab
RD
254 }
255
db1b5ddd 256 event = list_entry(ev_queue->event_list.next, struct ib_uverbs_event, list);
63aaf647 257
bc38a6ab
RD
258 if (eventsz > count) {
259 ret = -EINVAL;
260 event = NULL;
63aaf647 261 } else {
db1b5ddd 262 list_del(ev_queue->event_list.next);
63aaf647
RD
263 if (event->counter) {
264 ++(*event->counter);
265 list_del(&event->obj_list);
266 }
267 }
bc38a6ab 268
db1b5ddd 269 spin_unlock_irq(&ev_queue->lock);
bc38a6ab
RD
270
271 if (event) {
272 if (copy_to_user(buf, event, eventsz))
273 ret = -EFAULT;
274 else
275 ret = eventsz;
276 }
277
278 kfree(event);
279
280 return ret;
281}
282
1e7710f3
MB
283static ssize_t ib_uverbs_async_event_read(struct file *filp, char __user *buf,
284 size_t count, loff_t *pos)
285{
286 struct ib_uverbs_async_event_file *file = filp->private_data;
287
14e23bd6 288 return ib_uverbs_event_read(&file->ev_queue, filp, buf, count, pos,
e0fcc611 289 sizeof(struct ib_uverbs_async_event_desc));
1e7710f3
MB
290}
291
292static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf,
293 size_t count, loff_t *pos)
294{
295 struct ib_uverbs_completion_event_file *comp_ev_file =
296 filp->private_data;
297
14e23bd6
JG
298 return ib_uverbs_event_read(&comp_ev_file->ev_queue, filp, buf, count,
299 pos,
e0fcc611 300 sizeof(struct ib_uverbs_comp_event_desc));
1e7710f3
MB
301}
302
afc9a42b 303static __poll_t ib_uverbs_event_poll(struct ib_uverbs_event_queue *ev_queue,
1e7710f3 304 struct file *filp,
bc38a6ab
RD
305 struct poll_table_struct *wait)
306{
afc9a42b 307 __poll_t pollflags = 0;
bc38a6ab 308
db1b5ddd 309 poll_wait(filp, &ev_queue->poll_wait, wait);
bc38a6ab 310
db1b5ddd
MB
311 spin_lock_irq(&ev_queue->lock);
312 if (!list_empty(&ev_queue->event_list))
a9a08845 313 pollflags = EPOLLIN | EPOLLRDNORM;
eb356e6d
JG
314 else if (ev_queue->is_closed)
315 pollflags = EPOLLERR;
db1b5ddd 316 spin_unlock_irq(&ev_queue->lock);
bc38a6ab
RD
317
318 return pollflags;
319}
320
afc9a42b 321static __poll_t ib_uverbs_async_event_poll(struct file *filp,
1e7710f3
MB
322 struct poll_table_struct *wait)
323{
14e23bd6
JG
324 struct ib_uverbs_async_event_file *file = filp->private_data;
325
326 return ib_uverbs_event_poll(&file->ev_queue, filp, wait);
1e7710f3
MB
327}
328
afc9a42b 329static __poll_t ib_uverbs_comp_event_poll(struct file *filp,
1e7710f3
MB
330 struct poll_table_struct *wait)
331{
332 struct ib_uverbs_completion_event_file *comp_ev_file =
333 filp->private_data;
334
db1b5ddd 335 return ib_uverbs_event_poll(&comp_ev_file->ev_queue, filp, wait);
1e7710f3
MB
336}
337
338static int ib_uverbs_async_event_fasync(int fd, struct file *filp, int on)
abdf119b 339{
14e23bd6 340 struct ib_uverbs_async_event_file *file = filp->private_data;
abdf119b 341
14e23bd6 342 return fasync_helper(fd, filp, on, &file->ev_queue.async_queue);
abdf119b
GN
343}
344
1e7710f3 345static int ib_uverbs_comp_event_fasync(int fd, struct file *filp, int on)
bc38a6ab 346{
1e7710f3
MB
347 struct ib_uverbs_completion_event_file *comp_ev_file =
348 filp->private_data;
349
db1b5ddd 350 return fasync_helper(fd, filp, on, &comp_ev_file->ev_queue.async_queue);
1e7710f3
MB
351}
352
1e7710f3 353const struct file_operations uverbs_event_fops = {
6b73597e 354 .owner = THIS_MODULE,
1e7710f3
MB
355 .read = ib_uverbs_comp_event_read,
356 .poll = ib_uverbs_comp_event_poll,
f7c8416c 357 .release = uverbs_uobject_fd_release,
1e7710f3 358 .fasync = ib_uverbs_comp_event_fasync,
1e7710f3
MB
359};
360
3e032c0e 361const struct file_operations uverbs_async_event_fops = {
1e7710f3
MB
362 .owner = THIS_MODULE,
363 .read = ib_uverbs_async_event_read,
364 .poll = ib_uverbs_async_event_poll,
c485b19d 365 .release = uverbs_async_event_release,
1e7710f3 366 .fasync = ib_uverbs_async_event_fasync,
bc38a6ab
RD
367};
368
369void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
370{
db1b5ddd 371 struct ib_uverbs_event_queue *ev_queue = cq_context;
6b73597e
RD
372 struct ib_ucq_object *uobj;
373 struct ib_uverbs_event *entry;
374 unsigned long flags;
375
db1b5ddd 376 if (!ev_queue)
6b73597e
RD
377 return;
378
db1b5ddd
MB
379 spin_lock_irqsave(&ev_queue->lock, flags);
380 if (ev_queue->is_closed) {
381 spin_unlock_irqrestore(&ev_queue->lock, flags);
6b73597e
RD
382 return;
383 }
bc38a6ab 384
08f0e161 385 entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
305a7e87 386 if (!entry) {
db1b5ddd 387 spin_unlock_irqrestore(&ev_queue->lock, flags);
bc38a6ab 388 return;
305a7e87 389 }
bc38a6ab 390
5bd48c18 391 uobj = cq->uobject;
63aaf647 392
5bd48c18 393 entry->desc.comp.cq_handle = cq->uobject->uevent.uobject.user_handle;
63aaf647 394 entry->counter = &uobj->comp_events_reported;
bc38a6ab 395
db1b5ddd 396 list_add_tail(&entry->list, &ev_queue->event_list);
63aaf647 397 list_add_tail(&entry->obj_list, &uobj->comp_list);
db1b5ddd 398 spin_unlock_irqrestore(&ev_queue->lock, flags);
bc38a6ab 399
db1b5ddd
MB
400 wake_up_interruptible(&ev_queue->poll_wait);
401 kill_fasync(&ev_queue->async_queue, SIGIO, POLL_IN);
bc38a6ab
RD
402}
403
ccfdbaa5
JG
404void ib_uverbs_async_handler(struct ib_uverbs_async_event_file *async_file,
405 __u64 element, __u64 event,
406 struct list_head *obj_list, u32 *counter)
bc38a6ab 407{
63aaf647 408 struct ib_uverbs_event *entry;
bc38a6ab
RD
409 unsigned long flags;
410
a1123418
JG
411 if (!async_file)
412 return;
413
817d6576
JG
414 spin_lock_irqsave(&async_file->ev_queue.lock, flags);
415 if (async_file->ev_queue.is_closed) {
416 spin_unlock_irqrestore(&async_file->ev_queue.lock, flags);
6b73597e
RD
417 return;
418 }
419
08f0e161 420 entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
305a7e87 421 if (!entry) {
817d6576 422 spin_unlock_irqrestore(&async_file->ev_queue.lock, flags);
bc38a6ab 423 return;
305a7e87 424 }
bc38a6ab 425
817d6576 426 entry->desc.async.element = element;
63aaf647 427 entry->desc.async.event_type = event;
817d6576
JG
428 entry->desc.async.reserved = 0;
429 entry->counter = counter;
bc38a6ab 430
817d6576 431 list_add_tail(&entry->list, &async_file->ev_queue.event_list);
63aaf647
RD
432 if (obj_list)
433 list_add_tail(&entry->obj_list, obj_list);
817d6576 434 spin_unlock_irqrestore(&async_file->ev_queue.lock, flags);
bc38a6ab 435
817d6576
JG
436 wake_up_interruptible(&async_file->ev_queue.poll_wait);
437 kill_fasync(&async_file->ev_queue.async_queue, SIGIO, POLL_IN);
bc38a6ab
RD
438}
439
817d6576
JG
440static void uverbs_uobj_event(struct ib_uevent_object *eobj,
441 struct ib_event *event)
bc38a6ab 442{
98a8890f 443 ib_uverbs_async_handler(eobj->event_file,
817d6576
JG
444 eobj->uobject.user_handle, event->event,
445 &eobj->event_list, &eobj->events_reported);
446}
63aaf647 447
817d6576
JG
448void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
449{
450 uverbs_uobj_event(&event->element.cq->uobject->uevent, event);
bc38a6ab
RD
451}
452
453void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
454{
a040f95d 455 /* for XRC target qp's, check that qp is live */
fd3c7904 456 if (!event->element.qp->uobject)
a040f95d
JM
457 return;
458
817d6576 459 uverbs_uobj_event(&event->element.qp->uobject->uevent, event);
bc38a6ab
RD
460}
461
f213c052
YH
462void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr)
463{
817d6576 464 uverbs_uobj_event(&event->element.wq->uobject->uevent, event);
f213c052
YH
465}
466
f520ba5a
RD
467void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr)
468{
817d6576 469 uverbs_uobj_event(&event->element.srq->uobject->uevent, event);
f520ba5a
RD
470}
471
817d6576
JG
472static void ib_uverbs_event_handler(struct ib_event_handler *handler,
473 struct ib_event *event)
bc38a6ab 474{
817d6576 475 ib_uverbs_async_handler(
3e032c0e
JG
476 container_of(handler, struct ib_uverbs_async_event_file,
477 event_handler),
817d6576 478 event->element.port_num, event->event, NULL, NULL);
bc38a6ab
RD
479}
480
db1b5ddd 481void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue)
bc38a6ab 482{
db1b5ddd
MB
483 spin_lock_init(&ev_queue->lock);
484 INIT_LIST_HEAD(&ev_queue->event_list);
485 init_waitqueue_head(&ev_queue->poll_wait);
486 ev_queue->is_closed = 0;
487 ev_queue->async_queue = NULL;
1e7710f3
MB
488}
489
3e032c0e
JG
490void ib_uverbs_init_async_event_file(
491 struct ib_uverbs_async_event_file *async_file)
1e7710f3 492{
3e032c0e
JG
493 struct ib_uverbs_file *uverbs_file = async_file->uobj.ufile;
494 struct ib_device *ib_dev = async_file->uobj.context->device;
03c40442 495
3e032c0e 496 ib_uverbs_init_event_queue(&async_file->ev_queue);
03c40442 497
d680e88e 498 /* The first async_event_file becomes the default one for the file. */
a1123418 499 mutex_lock(&uverbs_file->ucontext_lock);
98a8890f 500 if (!uverbs_file->default_async_file) {
3e032c0e
JG
501 /* Pairs with the put in ib_uverbs_release_file */
502 uverbs_uobject_get(&async_file->uobj);
98a8890f 503 smp_store_release(&uverbs_file->default_async_file, async_file);
3e032c0e 504 }
a1123418 505 mutex_unlock(&uverbs_file->ucontext_lock);
3e032c0e
JG
506
507 INIT_IB_EVENT_HANDLER(&async_file->event_handler, ib_dev,
508 ib_uverbs_event_handler);
509 ib_register_event_handler(&async_file->event_handler);
6b73597e
RD
510}
511
6284380a 512static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr,
da0f60df
JG
513 struct ib_uverbs_ex_cmd_hdr *ex_hdr, size_t count,
514 const struct uverbs_api_write_method *method_elm)
6284380a 515{
da0f60df 516 if (method_elm->is_ex) {
6284380a
LR
517 count -= sizeof(*hdr) + sizeof(*ex_hdr);
518
519 if ((hdr->in_words + ex_hdr->provider_in_words) * 8 != count)
520 return -EINVAL;
521
da0f60df
JG
522 if (hdr->in_words * 8 < method_elm->req_size)
523 return -ENOSPC;
524
6284380a
LR
525 if (ex_hdr->cmd_hdr_reserved)
526 return -EINVAL;
527
528 if (ex_hdr->response) {
529 if (!hdr->out_words && !ex_hdr->provider_out_words)
530 return -EINVAL;
531
da0f60df
JG
532 if (hdr->out_words * 8 < method_elm->resp_size)
533 return -ENOSPC;
534
96d4f267 535 if (!access_ok(u64_to_user_ptr(ex_hdr->response),
6284380a
LR
536 (hdr->out_words + ex_hdr->provider_out_words) * 8))
537 return -EFAULT;
538 } else {
539 if (hdr->out_words || ex_hdr->provider_out_words)
540 return -EINVAL;
541 }
542
543 return 0;
544 }
545
546 /* not extended command */
547 if (hdr->in_words * 4 != count)
548 return -EINVAL;
549
c489800e 550 if (count < method_elm->req_size + sizeof(*hdr)) {
da0f60df
JG
551 /*
552 * rdma-core v18 and v19 have a bug where they send DESTROY_CQ
553 * with a 16 byte write instead of 24. Old kernels didn't
554 * check the size so they allowed this. Now that the size is
555 * checked provide a compatibility work around to not break
556 * those userspaces.
557 */
558 if (hdr->command == IB_USER_VERBS_CMD_DESTROY_CQ &&
559 count == 16) {
560 hdr->in_words = 6;
561 return 0;
562 }
563 return -ENOSPC;
564 }
565 if (hdr->out_words * 4 < method_elm->resp_size)
566 return -ENOSPC;
567
6284380a
LR
568 return 0;
569}
570
bc38a6ab
RD
571static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
572 size_t count, loff_t *pos)
573{
574 struct ib_uverbs_file *file = filp->private_data;
d120c3c9
JG
575 const struct uverbs_api_write_method *method_elm;
576 struct uverbs_api *uapi = file->device->uapi;
43ae9513 577 struct ib_uverbs_ex_cmd_hdr ex_hdr;
bc38a6ab 578 struct ib_uverbs_cmd_hdr hdr;
8313c10f 579 struct uverbs_attr_bundle bundle;
036b1063
YH
580 int srcu_key;
581 ssize_t ret;
057aec0d 582
f73a1dbc
LR
583 if (!ib_safe_file_access(filp)) {
584 pr_err_once("uverbs_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n",
585 task_tgid_vnr(current), current->comm);
e6bd18f5 586 return -EACCES;
f73a1dbc 587 }
e6bd18f5 588
08f0e161 589 if (count < sizeof(hdr))
bc38a6ab
RD
590 return -EINVAL;
591
08f0e161 592 if (copy_from_user(&hdr, buf, sizeof(hdr)))
bc38a6ab
RD
593 return -EFAULT;
594
d120c3c9
JG
595 method_elm = uapi_get_method(uapi, hdr.command);
596 if (IS_ERR(method_elm))
597 return PTR_ERR(method_elm);
77833b8a 598
d120c3c9 599 if (method_elm->is_ex) {
e21719fb
LR
600 if (count < (sizeof(hdr) + sizeof(ex_hdr)))
601 return -EINVAL;
602 if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr)))
603 return -EFAULT;
604 }
77833b8a 605
da0f60df 606 ret = verify_hdr(&hdr, &ex_hdr, count, method_elm);
6284380a
LR
607 if (ret)
608 return ret;
609
036b1063 610 srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
036b1063 611
6284380a 612 buf += sizeof(hdr);
400dbc96 613
d6f4a21f 614 memset(bundle.attr_present, 0, sizeof(bundle.attr_present));
8313c10f 615 bundle.ufile = file;
3d9dfd06 616 bundle.context = NULL; /* only valid if bundle has uobject */
0f63ef1d 617 bundle.uobject = NULL;
d120c3c9 618 if (!method_elm->is_ex) {
3a6532c9
JG
619 size_t in_len = hdr.in_words * 4 - sizeof(hdr);
620 size_t out_len = hdr.out_words * 4;
c2a939fd 621 u64 response = 0;
3a6532c9
JG
622
623 if (method_elm->has_udata) {
624 bundle.driver_udata.inlen =
625 in_len - method_elm->req_size;
626 in_len = method_elm->req_size;
627 if (bundle.driver_udata.inlen)
628 bundle.driver_udata.inbuf = buf + in_len;
629 else
630 bundle.driver_udata.inbuf = NULL;
631 } else {
632 memset(&bundle.driver_udata, 0,
633 sizeof(bundle.driver_udata));
634 }
635
636 if (method_elm->has_resp) {
3a6532c9
JG
637 /*
638 * The macros check that if has_resp is set
639 * then the command request structure starts
640 * with a '__aligned u64 response' member.
641 */
259e66bc 642 ret = get_user(response, (const u64 __user *)buf);
3a6532c9
JG
643 if (ret)
644 goto out_unlock;
645
646 if (method_elm->has_udata) {
647 bundle.driver_udata.outlen =
648 out_len - method_elm->resp_size;
649 out_len = method_elm->resp_size;
650 if (bundle.driver_udata.outlen)
651 bundle.driver_udata.outbuf =
652 u64_to_user_ptr(response +
653 out_len);
654 else
655 bundle.driver_udata.outbuf = NULL;
656 }
657 } else {
658 bundle.driver_udata.outlen = 0;
659 bundle.driver_udata.outbuf = NULL;
660 }
661
c2a939fd
JG
662 ib_uverbs_init_udata_buf_or_null(
663 &bundle.ucore, buf, u64_to_user_ptr(response),
664 in_len, out_len);
a6c4a66a 665 } else {
6284380a 666 buf += sizeof(ex_hdr);
f21519b2 667
c2a939fd 668 ib_uverbs_init_udata_buf_or_null(&bundle.ucore, buf,
12f72772
AB
669 u64_to_user_ptr(ex_hdr.response),
670 hdr.in_words * 8, hdr.out_words * 8);
a96e4e2f 671
c2a939fd
JG
672 ib_uverbs_init_udata_buf_or_null(
673 &bundle.driver_udata, buf + bundle.ucore.inlen,
674 u64_to_user_ptr(ex_hdr.response) + bundle.ucore.outlen,
675 ex_hdr.provider_in_words * 8,
676 ex_hdr.provider_out_words * 8);
f21519b2 677
400dbc96 678 }
f21519b2 679
974d6b4b 680 ret = method_elm->handler(&bundle);
0f63ef1d
LR
681 if (bundle.uobject)
682 uverbs_finalize_object(bundle.uobject, UVERBS_ACCESS_NEW, true,
683 !ret, &bundle);
3a6532c9 684out_unlock:
036b1063 685 srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
7106a976 686 return (ret) ? : count;
bc38a6ab
RD
687}
688
b86deba9
MK
689static const struct vm_operations_struct rdma_umap_ops;
690
bc38a6ab
RD
691static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
692{
693 struct ib_uverbs_file *file = filp->private_data;
22fa27fb 694 struct ib_ucontext *ucontext;
036b1063
YH
695 int ret = 0;
696 int srcu_key;
bc38a6ab 697
036b1063 698 srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
8313c10f 699 ucontext = ib_uverbs_get_ucontext_file(file);
22fa27fb
JG
700 if (IS_ERR(ucontext)) {
701 ret = PTR_ERR(ucontext);
036b1063
YH
702 goto out;
703 }
51976c6c
CT
704
705 mutex_lock(&file->disassociation_lock);
706
b86deba9 707 vma->vm_ops = &rdma_umap_ops;
3023a1e9 708 ret = ucontext->device->ops.mmap(ucontext, vma);
51976c6c
CT
709
710 mutex_unlock(&file->disassociation_lock);
036b1063
YH
711out:
712 srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
713 return ret;
bc38a6ab
RD
714}
715
5f9794dc
JG
716/*
717 * The VMA has been dup'd, initialize the vm_private_data with a new tracking
718 * struct
719 */
720static void rdma_umap_open(struct vm_area_struct *vma)
721{
722 struct ib_uverbs_file *ufile = vma->vm_file->private_data;
723 struct rdma_umap_priv *opriv = vma->vm_private_data;
724 struct rdma_umap_priv *priv;
725
726 if (!opriv)
727 return;
728
729 /* We are racing with disassociation */
730 if (!down_read_trylock(&ufile->hw_destroy_rwsem))
731 goto out_zap;
51976c6c
CT
732 mutex_lock(&ufile->disassociation_lock);
733
5f9794dc
JG
734 /*
735 * Disassociation already completed, the VMA should already be zapped.
736 */
737 if (!ufile->ucontext)
738 goto out_unlock;
739
740 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
741 if (!priv)
742 goto out_unlock;
c043ff2c 743 rdma_umap_priv_init(priv, vma, opriv->entry);
5f9794dc 744
51976c6c 745 mutex_unlock(&ufile->disassociation_lock);
5f9794dc
JG
746 up_read(&ufile->hw_destroy_rwsem);
747 return;
748
749out_unlock:
51976c6c 750 mutex_unlock(&ufile->disassociation_lock);
5f9794dc
JG
751 up_read(&ufile->hw_destroy_rwsem);
752out_zap:
753 /*
754 * We can't allow the VMA to be created with the actual IO pages, that
755 * would break our API contract, and it can't be stopped at this
756 * point, so zap it.
757 */
758 vma->vm_private_data = NULL;
759 zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
760}
761
762static void rdma_umap_close(struct vm_area_struct *vma)
763{
764 struct ib_uverbs_file *ufile = vma->vm_file->private_data;
765 struct rdma_umap_priv *priv = vma->vm_private_data;
766
767 if (!priv)
768 return;
769
770 /*
771 * The vma holds a reference on the struct file that created it, which
772 * in turn means that the ib_uverbs_file is guaranteed to exist at
773 * this point.
774 */
775 mutex_lock(&ufile->umap_lock);
c043ff2c
MK
776 if (priv->entry)
777 rdma_user_mmap_entry_put(priv->entry);
778
5f9794dc
JG
779 list_del(&priv->list);
780 mutex_unlock(&ufile->umap_lock);
781 kfree(priv);
782}
783
67f269b3
JG
784/*
785 * Once the zap_vma_ptes has been called touches to the VMA will come here and
786 * we return a dummy writable zero page for all the pfns.
787 */
788static vm_fault_t rdma_umap_fault(struct vm_fault *vmf)
789{
790 struct ib_uverbs_file *ufile = vmf->vma->vm_file->private_data;
791 struct rdma_umap_priv *priv = vmf->vma->vm_private_data;
792 vm_fault_t ret = 0;
793
794 if (!priv)
795 return VM_FAULT_SIGBUS;
796
797 /* Read only pages can just use the system zero page. */
798 if (!(vmf->vma->vm_flags & (VM_WRITE | VM_MAYWRITE))) {
6a5c5d26 799 vmf->page = ZERO_PAGE(vmf->address);
67f269b3
JG
800 get_page(vmf->page);
801 return 0;
802 }
803
804 mutex_lock(&ufile->umap_lock);
805 if (!ufile->disassociate_page)
806 ufile->disassociate_page =
807 alloc_pages(vmf->gfp_mask | __GFP_ZERO, 0);
808
809 if (ufile->disassociate_page) {
810 /*
811 * This VMA is forced to always be shared so this doesn't have
812 * to worry about COW.
813 */
814 vmf->page = ufile->disassociate_page;
815 get_page(vmf->page);
816 } else {
817 ret = VM_FAULT_SIGBUS;
818 }
819 mutex_unlock(&ufile->umap_lock);
820
821 return ret;
822}
823
5f9794dc
JG
824static const struct vm_operations_struct rdma_umap_ops = {
825 .open = rdma_umap_open,
826 .close = rdma_umap_close,
67f269b3 827 .fault = rdma_umap_fault,
5f9794dc
JG
828};
829
5f9794dc
JG
830void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
831{
832 struct rdma_umap_priv *priv, *next_priv;
833
51976c6c 834 mutex_lock(&ufile->disassociation_lock);
5f9794dc
JG
835
836 while (1) {
837 struct mm_struct *mm = NULL;
838
839 /* Get an arbitrary mm pointer that hasn't been cleaned yet */
840 mutex_lock(&ufile->umap_lock);
7b21b69a
YH
841 while (!list_empty(&ufile->umaps)) {
842 int ret;
843
844 priv = list_first_entry(&ufile->umaps,
845 struct rdma_umap_priv, list);
846 mm = priv->vma->vm_mm;
847 ret = mmget_not_zero(mm);
848 if (!ret) {
849 list_del_init(&priv->list);
39c011a5
JG
850 if (priv->entry) {
851 rdma_user_mmap_entry_put(priv->entry);
852 priv->entry = NULL;
853 }
7b21b69a
YH
854 mm = NULL;
855 continue;
856 }
857 break;
5f9794dc
JG
858 }
859 mutex_unlock(&ufile->umap_lock);
51976c6c
CT
860 if (!mm) {
861 mutex_unlock(&ufile->disassociation_lock);
5f9794dc 862 return;
51976c6c 863 }
5f9794dc
JG
864
865 /*
c1e8d7c6 866 * The umap_lock is nested under mmap_lock since it used within
5f9794dc
JG
867 * the vma_ops callbacks, so we have to clean the list one mm
868 * at a time to get the lock ordering right. Typically there
869 * will only be one mm, so no big deal.
870 */
d8ed45c5 871 mmap_read_lock(mm);
5f9794dc
JG
872 mutex_lock(&ufile->umap_lock);
873 list_for_each_entry_safe (priv, next_priv, &ufile->umaps,
874 list) {
875 struct vm_area_struct *vma = priv->vma;
876
877 if (vma->vm_mm != mm)
878 continue;
879 list_del_init(&priv->list);
880
881 zap_vma_ptes(vma, vma->vm_start,
882 vma->vm_end - vma->vm_start);
c043ff2c
MK
883
884 if (priv->entry) {
885 rdma_user_mmap_entry_put(priv->entry);
886 priv->entry = NULL;
887 }
5f9794dc
JG
888 }
889 mutex_unlock(&ufile->umap_lock);
d8ed45c5 890 mmap_read_unlock(mm);
5f9794dc
JG
891 mmput(mm);
892 }
51976c6c
CT
893
894 mutex_unlock(&ufile->disassociation_lock);
895}
896
897/**
898 * rdma_user_mmap_disassociate() - Revoke mmaps for a device
899 * @device: device to revoke
900 *
901 * This function should be called by drivers that need to disable mmaps for the
902 * device, for instance because it is going to be reset.
903 */
904void rdma_user_mmap_disassociate(struct ib_device *device)
905{
906 struct ib_uverbs_device *uverbs_dev =
907 ib_get_client_data(device, &uverbs_client);
908 struct ib_uverbs_file *ufile;
909
910 mutex_lock(&uverbs_dev->lists_mutex);
911 list_for_each_entry(ufile, &uverbs_dev->uverbs_file_list, list) {
912 if (ufile->ucontext)
913 uverbs_user_mmap_disassociate(ufile);
914 }
915 mutex_unlock(&uverbs_dev->lists_mutex);
5f9794dc 916}
51976c6c 917EXPORT_SYMBOL(rdma_user_mmap_disassociate);
5f9794dc 918
5b2d281a
RD
919/*
920 * ib_uverbs_open() does not need the BKL:
921 *
2a72f212 922 * - the ib_uverbs_device structures are properly reference counted and
5b2d281a
RD
923 * everything else is purely local to the file being created, so
924 * races against other open calls are not a problem;
925 * - there is no ioctl method to race against;
2a72f212
AC
926 * - the open method will either immediately run -ENXIO, or all
927 * required initialization will be done.
5b2d281a 928 */
bc38a6ab
RD
929static int ib_uverbs_open(struct inode *inode, struct file *filp)
930{
70a30e16 931 struct ib_uverbs_device *dev;
bc38a6ab 932 struct ib_uverbs_file *file;
036b1063 933 struct ib_device *ib_dev;
70a30e16 934 int ret;
036b1063
YH
935 int module_dependent;
936 int srcu_key;
bc38a6ab 937
2a72f212 938 dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev);
ec9bf373 939 if (!refcount_inc_not_zero(&dev->refcount))
70a30e16
RD
940 return -ENXIO;
941
c5c4d92e 942 get_device(&dev->dev);
036b1063
YH
943 srcu_key = srcu_read_lock(&dev->disassociate_srcu);
944 mutex_lock(&dev->lists_mutex);
945 ib_dev = srcu_dereference(dev->ib_dev,
946 &dev->disassociate_srcu);
947 if (!ib_dev) {
948 ret = -EIO;
70a30e16
RD
949 goto err;
950 }
bc38a6ab 951
41c61401
PP
952 if (!rdma_dev_access_netns(ib_dev, current->nsproxy->net_ns)) {
953 ret = -EPERM;
954 goto err;
955 }
956
036b1063
YH
957 /* In case IB device supports disassociate ucontext, there is no hard
958 * dependency between uverbs device and its low level device.
959 */
3023a1e9 960 module_dependent = !(ib_dev->ops.disassociate_ucontext);
036b1063
YH
961
962 if (module_dependent) {
7a154142 963 if (!try_module_get(ib_dev->ops.owner)) {
036b1063
YH
964 ret = -ENODEV;
965 goto err;
966 }
967 }
968
969 file = kzalloc(sizeof(*file), GFP_KERNEL);
63c47c28 970 if (!file) {
70a30e16 971 ret = -ENOMEM;
036b1063
YH
972 if (module_dependent)
973 goto err_module;
974
975 goto err;
63c47c28 976 }
bc38a6ab 977
70a30e16 978 file->device = dev;
bc38a6ab 979 kref_init(&file->ref);
e951747a 980 mutex_init(&file->ucontext_lock);
bc38a6ab 981
87064277 982 spin_lock_init(&file->uobjects_lock);
6a5e9c88 983 INIT_LIST_HEAD(&file->uobjects);
87064277 984 init_rwsem(&file->hw_destroy_rwsem);
5f9794dc
JG
985 mutex_init(&file->umap_lock);
986 INIT_LIST_HEAD(&file->umaps);
6a5e9c88 987
51976c6c
CT
988 mutex_init(&file->disassociation_lock);
989
bc38a6ab 990 filp->private_data = file;
036b1063
YH
991 list_add_tail(&file->list, &dev->uverbs_file_list);
992 mutex_unlock(&dev->lists_mutex);
993 srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
bc38a6ab 994
0f50d88a
JG
995 setup_ufile_idr_uobject(file);
996
c5bf68fe 997 return stream_open(inode, filp);
70a30e16
RD
998
999err_module:
7a154142 1000 module_put(ib_dev->ops.owner);
70a30e16
RD
1001
1002err:
036b1063
YH
1003 mutex_unlock(&dev->lists_mutex);
1004 srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
ec9bf373 1005 if (refcount_dec_and_test(&dev->refcount))
35d4a0b6
YH
1006 ib_uverbs_comp_dev(dev);
1007
c5c4d92e 1008 put_device(&dev->dev);
70a30e16 1009 return ret;
bc38a6ab
RD
1010}
1011
1012static int ib_uverbs_close(struct inode *inode, struct file *filp)
1013{
1014 struct ib_uverbs_file *file = filp->private_data;
d1e09f30 1015
e951747a 1016 uverbs_destroy_ufile_hw(file, RDMA_REMOVE_CLOSE);
036b1063
YH
1017
1018 mutex_lock(&file->device->lists_mutex);
6ebce447 1019 list_del_init(&file->list);
036b1063 1020 mutex_unlock(&file->device->lists_mutex);
70a30e16 1021
bc38a6ab
RD
1022 kref_put(&file->ref, ib_uverbs_release_file);
1023
1024 return 0;
1025}
1026
2b8693c0 1027static const struct file_operations uverbs_fops = {
9afed76d
AC
1028 .owner = THIS_MODULE,
1029 .write = ib_uverbs_write,
1030 .open = ib_uverbs_open,
bc1db9af 1031 .release = ib_uverbs_close,
8eb19e8e 1032 .unlocked_ioctl = ib_uverbs_ioctl,
1832f2d8 1033 .compat_ioctl = compat_ptr_ioctl,
bc38a6ab
RD
1034};
1035
2b8693c0 1036static const struct file_operations uverbs_mmap_fops = {
9afed76d
AC
1037 .owner = THIS_MODULE,
1038 .write = ib_uverbs_write,
bc38a6ab 1039 .mmap = ib_uverbs_mmap,
9afed76d 1040 .open = ib_uverbs_open,
bc1db9af 1041 .release = ib_uverbs_close,
8eb19e8e 1042 .unlocked_ioctl = ib_uverbs_ioctl,
1832f2d8 1043 .compat_ioctl = compat_ptr_ioctl,
bc38a6ab
RD
1044};
1045
8f71bb00
JG
1046static int ib_uverbs_get_nl_info(struct ib_device *ibdev, void *client_data,
1047 struct ib_client_nl_info *res)
1048{
1049 struct ib_uverbs_device *uverbs_dev = client_data;
1050 int ret;
1051
1052 if (res->port != -1)
1053 return -EINVAL;
1054
1055 res->abi = ibdev->ops.uverbs_abi_ver;
1056 res->cdev = &uverbs_dev->dev;
1057
1058 /*
1059 * To support DRIVER_ID binding in userspace some of the driver need
1060 * upgrading to expose their PCI dependent revision information
1061 * through get_context instead of relying on modalias matching. When
1062 * the drivers are fixed they can drop this flag.
1063 */
1064 if (!ibdev->ops.uverbs_no_driver_id_binding) {
1065 ret = nla_put_u32(res->nl_msg, RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID,
1066 ibdev->ops.driver_id);
1067 if (ret)
1068 return ret;
1069 }
1070 return 0;
1071}
1072
bc38a6ab
RD
1073static struct ib_client uverbs_client = {
1074 .name = "uverbs",
6780c4fa 1075 .no_kverbs_req = true,
bc38a6ab 1076 .add = ib_uverbs_add_one,
8f71bb00
JG
1077 .remove = ib_uverbs_remove_one,
1078 .get_nl_info = ib_uverbs_get_nl_info,
bc38a6ab 1079};
8f71bb00 1080MODULE_ALIAS_RDMA_CLIENT("uverbs");
bc38a6ab 1081
b53b1c08 1082static ssize_t ibdev_show(struct device *device, struct device_attribute *attr,
f4e91eb4 1083 char *buf)
bc38a6ab 1084{
c5c4d92e
PP
1085 struct ib_uverbs_device *dev =
1086 container_of(device, struct ib_uverbs_device, dev);
036b1063
YH
1087 int ret = -ENODEV;
1088 int srcu_key;
036b1063 1089 struct ib_device *ib_dev;
70a30e16 1090
036b1063
YH
1091 srcu_key = srcu_read_lock(&dev->disassociate_srcu);
1092 ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
1093 if (ib_dev)
1c7fd726 1094 ret = sysfs_emit(buf, "%s\n", dev_name(&ib_dev->dev));
036b1063
YH
1095 srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
1096
1097 return ret;
bc38a6ab 1098}
b53b1c08 1099static DEVICE_ATTR_RO(ibdev);
bc38a6ab 1100
b53b1c08
PP
1101static ssize_t abi_version_show(struct device *device,
1102 struct device_attribute *attr, char *buf)
274c0891 1103{
c5c4d92e
PP
1104 struct ib_uverbs_device *dev =
1105 container_of(device, struct ib_uverbs_device, dev);
036b1063
YH
1106 int ret = -ENODEV;
1107 int srcu_key;
1108 struct ib_device *ib_dev;
70a30e16 1109
036b1063
YH
1110 srcu_key = srcu_read_lock(&dev->disassociate_srcu);
1111 ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
1112 if (ib_dev)
1c7fd726 1113 ret = sysfs_emit(buf, "%u\n", ib_dev->ops.uverbs_abi_ver);
036b1063 1114 srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
274c0891 1115
036b1063 1116 return ret;
274c0891 1117}
b53b1c08
PP
1118static DEVICE_ATTR_RO(abi_version);
1119
1120static struct attribute *ib_dev_attrs[] = {
1121 &dev_attr_abi_version.attr,
1122 &dev_attr_ibdev.attr,
1123 NULL,
1124};
1125
1126static const struct attribute_group dev_attr_group = {
1127 .attrs = ib_dev_attrs,
1128};
274c0891 1129
0933e2d9
AK
1130static CLASS_ATTR_STRING(abi_version, S_IRUGO,
1131 __stringify(IB_USER_VERBS_ABI_VERSION));
bc38a6ab 1132
7d96c9b1
JG
1133static int ib_uverbs_create_uapi(struct ib_device *device,
1134 struct ib_uverbs_device *uverbs_dev)
1135{
9ed3e5f4 1136 struct uverbs_api *uapi;
7d96c9b1 1137
6829c1c2 1138 uapi = uverbs_alloc_api(device);
51d0a2b4 1139 if (IS_ERR(uapi))
9ed3e5f4 1140 return PTR_ERR(uapi);
9ed3e5f4 1141
9ed3e5f4 1142 uverbs_dev->uapi = uapi;
7d96c9b1
JG
1143 return 0;
1144}
1145
11a0ae4c 1146static int ib_uverbs_add_one(struct ib_device *device)
bc38a6ab 1147{
38707980 1148 int devnum;
ddbd6883 1149 dev_t base;
bc38a6ab 1150 struct ib_uverbs_device *uverbs_dev;
036b1063 1151 int ret;
bc38a6ab 1152
bca51197
MZ
1153 if (!device->ops.alloc_ucontext ||
1154 device->type == RDMA_DEVICE_TYPE_SMI)
11a0ae4c 1155 return -EOPNOTSUPP;
bc38a6ab 1156
08f0e161 1157 uverbs_dev = kzalloc(sizeof(*uverbs_dev), GFP_KERNEL);
bc38a6ab 1158 if (!uverbs_dev)
11a0ae4c 1159 return -ENOMEM;
bc38a6ab 1160
036b1063
YH
1161 ret = init_srcu_struct(&uverbs_dev->disassociate_srcu);
1162 if (ret) {
1163 kfree(uverbs_dev);
11a0ae4c 1164 return -ENOMEM;
036b1063
YH
1165 }
1166
00991039 1167 device_initialize(&uverbs_dev->dev);
64917f4c 1168 uverbs_dev->dev.class = &uverbs_class;
00991039
JG
1169 uverbs_dev->dev.parent = device->dev.parent;
1170 uverbs_dev->dev.release = ib_uverbs_release_dev;
1171 uverbs_dev->groups[0] = &dev_attr_group;
1172 uverbs_dev->dev.groups = uverbs_dev->groups;
ec9bf373 1173 refcount_set(&uverbs_dev->refcount, 1);
fd60ae40 1174 init_completion(&uverbs_dev->comp);
53d0bd1e
SH
1175 uverbs_dev->xrcd_tree = RB_ROOT;
1176 mutex_init(&uverbs_dev->xrcd_tree_mutex);
036b1063
YH
1177 mutex_init(&uverbs_dev->lists_mutex);
1178 INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list);
00991039
JG
1179 rcu_assign_pointer(uverbs_dev->ib_dev, device);
1180 uverbs_dev->num_comp_vectors = device->num_comp_vectors;
70a30e16 1181
90f6e41c
LR
1182 devnum = ida_alloc_max(&uverbs_ida, IB_UVERBS_MAX_DEVICES - 1,
1183 GFP_KERNEL);
11a0ae4c
JG
1184 if (devnum < 0) {
1185 ret = -ENOMEM;
8cf12d77 1186 goto err;
11a0ae4c 1187 }
8cf12d77 1188 uverbs_dev->devnum = devnum;
8cf12d77
HN
1189 if (devnum >= IB_UVERBS_NUM_FIXED_MINOR)
1190 base = dynamic_uverbs_dev + devnum - IB_UVERBS_NUM_FIXED_MINOR;
1191 else
1192 base = IB_UVERBS_BASE_DEV + devnum;
bc38a6ab 1193
11a0ae4c
JG
1194 ret = ib_uverbs_create_uapi(device, uverbs_dev);
1195 if (ret)
08e74be1 1196 goto err_uapi;
7d96c9b1 1197
c5c4d92e 1198 uverbs_dev->dev.devt = base;
c5c4d92e
PP
1199 dev_set_name(&uverbs_dev->dev, "uverbs%d", uverbs_dev->devnum);
1200
1201 cdev_init(&uverbs_dev->cdev,
3023a1e9 1202 device->ops.mmap ? &uverbs_mmap_fops : &uverbs_fops);
055422dd 1203 uverbs_dev->cdev.owner = THIS_MODULE;
bc38a6ab 1204
c5c4d92e
PP
1205 ret = cdev_device_add(&uverbs_dev->cdev, &uverbs_dev->dev);
1206 if (ret)
00991039 1207 goto err_uapi;
bc38a6ab 1208
bc38a6ab 1209 ib_set_client_data(device, &uverbs_client, uverbs_dev);
11a0ae4c 1210 return 0;
bc38a6ab 1211
08e74be1 1212err_uapi:
90f6e41c 1213 ida_free(&uverbs_ida, devnum);
bc38a6ab 1214err:
ec9bf373 1215 if (refcount_dec_and_test(&uverbs_dev->refcount))
35d4a0b6 1216 ib_uverbs_comp_dev(uverbs_dev);
fd60ae40 1217 wait_for_completion(&uverbs_dev->comp);
00991039 1218 put_device(&uverbs_dev->dev);
11a0ae4c 1219 return ret;
bc38a6ab
RD
1220}
1221
036b1063
YH
1222static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
1223 struct ib_device *ib_dev)
1224{
1225 struct ib_uverbs_file *file;
036b1063
YH
1226
1227 /* Pending running commands to terminate */
9ed3e5f4 1228 uverbs_disassociate_api_pre(uverbs_dev);
036b1063
YH
1229
1230 mutex_lock(&uverbs_dev->lists_mutex);
1231 while (!list_empty(&uverbs_dev->uverbs_file_list)) {
036b1063
YH
1232 file = list_first_entry(&uverbs_dev->uverbs_file_list,
1233 struct ib_uverbs_file, list);
6ebce447 1234 list_del_init(&file->list);
036b1063 1235 kref_get(&file->ref);
d1e09f30 1236
e951747a
JG
1237 /* We must release the mutex before going ahead and calling
1238 * uverbs_cleanup_ufile, as it might end up indirectly calling
1239 * uverbs_close, for example due to freeing the resources (e.g
1240 * mmput).
d1e09f30 1241 */
e951747a 1242 mutex_unlock(&uverbs_dev->lists_mutex);
036b1063 1243
e951747a 1244 uverbs_destroy_ufile_hw(file, RDMA_REMOVE_DRIVER_REMOVE);
036b1063 1245 kref_put(&file->ref, ib_uverbs_release_file);
e951747a
JG
1246
1247 mutex_lock(&uverbs_dev->lists_mutex);
036b1063 1248 }
036b1063 1249 mutex_unlock(&uverbs_dev->lists_mutex);
9ed3e5f4
JG
1250
1251 uverbs_disassociate_api(uverbs_dev->uapi);
036b1063
YH
1252}
1253
7c1eb45a 1254static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
bc38a6ab 1255{
7c1eb45a 1256 struct ib_uverbs_device *uverbs_dev = client_data;
036b1063 1257 int wait_clients = 1;
bc38a6ab 1258
c5c4d92e 1259 cdev_device_del(&uverbs_dev->cdev, &uverbs_dev->dev);
90f6e41c 1260 ida_free(&uverbs_ida, uverbs_dev->devnum);
fd60ae40 1261
3023a1e9 1262 if (device->ops.disassociate_ucontext) {
036b1063
YH
1263 /* We disassociate HW resources and immediately return.
1264 * Userspace will see a EIO errno for all future access.
1265 * Upon returning, ib_device may be freed internally and is not
1266 * valid any more.
1267 * uverbs_device is still available until all clients close
1268 * their files, then the uverbs device ref count will be zero
1269 * and its resources will be freed.
1270 * Note: At this point no more files can be opened since the
1271 * cdev was deleted, however active clients can still issue
1272 * commands and close their open files.
1273 */
036b1063
YH
1274 ib_uverbs_free_hw_resources(uverbs_dev, device);
1275 wait_clients = 0;
1276 }
1277
ec9bf373 1278 if (refcount_dec_and_test(&uverbs_dev->refcount))
35d4a0b6 1279 ib_uverbs_comp_dev(uverbs_dev);
036b1063
YH
1280 if (wait_clients)
1281 wait_for_completion(&uverbs_dev->comp);
52427112 1282
c5c4d92e 1283 put_device(&uverbs_dev->dev);
bc38a6ab
RD
1284}
1285
bc38a6ab
RD
1286static int __init ib_uverbs_init(void)
1287{
1288 int ret;
1289
8cf12d77
HN
1290 ret = register_chrdev_region(IB_UVERBS_BASE_DEV,
1291 IB_UVERBS_NUM_FIXED_MINOR,
bc38a6ab
RD
1292 "infiniband_verbs");
1293 if (ret) {
aba25a3e 1294 pr_err("user_verbs: couldn't register device number\n");
bc38a6ab
RD
1295 goto out;
1296 }
1297
8cf12d77
HN
1298 ret = alloc_chrdev_region(&dynamic_uverbs_dev, 0,
1299 IB_UVERBS_NUM_DYNAMIC_MINOR,
1300 "infiniband_verbs");
1301 if (ret) {
1302 pr_err("couldn't register dynamic device number\n");
1303 goto out_alloc;
1304 }
1305
64917f4c
IO
1306 ret = class_register(&uverbs_class);
1307 if (ret) {
aba25a3e 1308 pr_err("user_verbs: couldn't create class infiniband_verbs\n");
bc38a6ab
RD
1309 goto out_chrdev;
1310 }
1311
64917f4c 1312 ret = class_create_file(&uverbs_class, &class_attr_abi_version.attr);
bc38a6ab 1313 if (ret) {
aba25a3e 1314 pr_err("user_verbs: couldn't create abi_version attribute\n");
bc38a6ab
RD
1315 goto out_class;
1316 }
1317
bc38a6ab
RD
1318 ret = ib_register_client(&uverbs_client);
1319 if (ret) {
aba25a3e 1320 pr_err("user_verbs: couldn't register client\n");
a265e558 1321 goto out_class;
bc38a6ab
RD
1322 }
1323
1324 return 0;
1325
bc38a6ab 1326out_class:
64917f4c 1327 class_unregister(&uverbs_class);
bc38a6ab
RD
1328
1329out_chrdev:
8cf12d77
HN
1330 unregister_chrdev_region(dynamic_uverbs_dev,
1331 IB_UVERBS_NUM_DYNAMIC_MINOR);
1332
1333out_alloc:
1334 unregister_chrdev_region(IB_UVERBS_BASE_DEV,
1335 IB_UVERBS_NUM_FIXED_MINOR);
bc38a6ab
RD
1336
1337out:
1338 return ret;
1339}
1340
1341static void __exit ib_uverbs_cleanup(void)
1342{
1343 ib_unregister_client(&uverbs_client);
64917f4c 1344 class_unregister(&uverbs_class);
8cf12d77
HN
1345 unregister_chrdev_region(IB_UVERBS_BASE_DEV,
1346 IB_UVERBS_NUM_FIXED_MINOR);
1347 unregister_chrdev_region(dynamic_uverbs_dev,
1348 IB_UVERBS_NUM_DYNAMIC_MINOR);
61e51682 1349 ib_cleanup_ucaps();
c571feca 1350 mmu_notifier_synchronize();
bc38a6ab
RD
1351}
1352
1353module_init(ib_uverbs_init);
1354module_exit(ib_uverbs_cleanup);