]> git.ipfire.org Git - thirdparty/linux.git/blame - drivers/gpu/drm/i915/gem/i915_gem_userptr.c
mmap locking API: convert mmap_sem comments
[thirdparty/linux.git] / drivers / gpu / drm / i915 / gem / i915_gem_userptr.c
CommitLineData
5cc9ed4b 1/*
10be98a7 2 * SPDX-License-Identifier: MIT
5cc9ed4b 3 *
10be98a7 4 * Copyright © 2012-2014 Intel Corporation
5cc9ed4b
CW
5 */
6
5cc9ed4b
CW
7#include <linux/mmu_context.h>
8#include <linux/mmu_notifier.h>
9#include <linux/mempolicy.h>
10#include <linux/swap.h>
6e84f315 11#include <linux/sched/mm.h>
5cc9ed4b 12
6da4a2c4 13#include "i915_drv.h"
10be98a7
CW
14#include "i915_gem_ioctls.h"
15#include "i915_gem_object.h"
37d63f8f 16#include "i915_scatterlist.h"
afa13085 17
ad46cb53
CW
18struct i915_mm_struct {
19 struct mm_struct *mm;
f470b190 20 struct drm_i915_private *i915;
ad46cb53
CW
21 struct i915_mmu_notifier *mn;
22 struct hlist_node node;
23 struct kref kref;
24 struct work_struct work;
25};
26
5cc9ed4b
CW
27#if defined(CONFIG_MMU_NOTIFIER)
28#include <linux/interval_tree.h>
29
30struct i915_mmu_notifier {
31 spinlock_t lock;
32 struct hlist_node node;
33 struct mmu_notifier mn;
f808c13f 34 struct rb_root_cached objects;
484d9a84 35 struct i915_mm_struct *mm;
5cc9ed4b
CW
36};
37
38struct i915_mmu_object {
ad46cb53 39 struct i915_mmu_notifier *mn;
768e159f 40 struct drm_i915_gem_object *obj;
5cc9ed4b 41 struct interval_tree_node it;
5cc9ed4b
CW
42};
43
484d9a84 44static void add_object(struct i915_mmu_object *mo)
ec8b0dd5 45{
484d9a84
CW
46 GEM_BUG_ON(!RB_EMPTY_NODE(&mo->it.rb));
47 interval_tree_insert(&mo->it, &mo->mn->objects);
ec8b0dd5
CW
48}
49
484d9a84 50static void del_object(struct i915_mmu_object *mo)
ec8b0dd5 51{
484d9a84 52 if (RB_EMPTY_NODE(&mo->it.rb))
768e159f 53 return;
ec8b0dd5 54
484d9a84
CW
55 interval_tree_remove(&mo->it, &mo->mn->objects);
56 RB_CLEAR_NODE(&mo->it.rb);
768e159f
CW
57}
58
484d9a84
CW
59static void
60__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value)
768e159f 61{
484d9a84
CW
62 struct i915_mmu_object *mo = obj->userptr.mmu_object;
63
64 /*
65 * During mm_invalidate_range we need to cancel any userptr that
66 * overlaps the range being invalidated. Doing so requires the
67 * struct_mutex, and that risks recursion. In order to cause
68 * recursion, the user must alias the userptr address space with
69 * a GTT mmapping (possible with a MAP_FIXED) - then when we have
70 * to invalidate that mmaping, mm_invalidate_range is called with
71 * the userptr address *and* the struct_mutex held. To prevent that
72 * we set a flag under the i915_mmu_notifier spinlock to indicate
73 * whether this object is valid.
74 */
75 if (!mo)
768e159f
CW
76 return;
77
484d9a84
CW
78 spin_lock(&mo->mn->lock);
79 if (value)
80 add_object(mo);
81 else
82 del_object(mo);
83 spin_unlock(&mo->mn->lock);
ec8b0dd5
CW
84}
85
484d9a84
CW
86static int
87userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
88 const struct mmu_notifier_range *range)
5cc9ed4b 89{
380996aa
CW
90 struct i915_mmu_notifier *mn =
91 container_of(_mn, struct i915_mmu_notifier, mn);
768e159f 92 struct interval_tree_node *it;
5d6527a7 93 unsigned long end;
484d9a84 94 int ret = 0;
768e159f 95
f808c13f 96 if (RB_EMPTY_ROOT(&mn->objects.rb_root))
93065ac7 97 return 0;
380996aa
CW
98
99 /* interval ranges are inclusive, but invalidate range is exclusive */
5d6527a7 100 end = range->end - 1;
380996aa
CW
101
102 spin_lock(&mn->lock);
5d6527a7 103 it = interval_tree_iter_first(&mn->objects, range->start, end);
768e159f 104 while (it) {
484d9a84
CW
105 struct drm_i915_gem_object *obj;
106
dfcd6660 107 if (!mmu_notifier_range_blockable(range)) {
484d9a84
CW
108 ret = -EAGAIN;
109 break;
93065ac7 110 }
484d9a84
CW
111
112 /*
113 * The mmu_object is released late when destroying the
768e159f
CW
114 * GEM object so it is entirely possible to gain a
115 * reference on an object in the process of being freed
116 * since our serialisation is via the spinlock and not
117 * the struct_mutex - and consequently use it after it
118 * is freed and then double free it. To prevent that
119 * use-after-free we only acquire a reference on the
120 * object if it is not in the process of being destroyed.
121 */
484d9a84
CW
122 obj = container_of(it, struct i915_mmu_object, it)->obj;
123 if (!kref_get_unless_zero(&obj->base.refcount)) {
124 it = interval_tree_iter_next(it, range->start, end);
125 continue;
126 }
127 spin_unlock(&mn->lock);
128
c03467ba 129 ret = i915_gem_object_unbind(obj,
93e89ac8
CW
130 I915_GEM_OBJECT_UNBIND_ACTIVE |
131 I915_GEM_OBJECT_UNBIND_BARRIER);
484d9a84 132 if (ret == 0)
f86dbacb 133 ret = __i915_gem_object_put_pages(obj);
484d9a84
CW
134 i915_gem_object_put(obj);
135 if (ret)
2850748e 136 return ret;
5cc9ed4b 137
484d9a84
CW
138 spin_lock(&mn->lock);
139
140 /*
141 * As we do not (yet) protect the mmu from concurrent insertion
142 * over this range, there is no guarantee that this search will
143 * terminate given a pathologic workload.
144 */
145 it = interval_tree_iter_first(&mn->objects, range->start, end);
5cc9ed4b 146 }
380996aa 147 spin_unlock(&mn->lock);
393afc2c 148
484d9a84 149 return ret;
93065ac7 150
5cc9ed4b
CW
151}
152
153static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
484d9a84 154 .invalidate_range_start = userptr_mn_invalidate_range_start,
5cc9ed4b
CW
155};
156
157static struct i915_mmu_notifier *
484d9a84 158i915_mmu_notifier_create(struct i915_mm_struct *mm)
5cc9ed4b 159{
ad46cb53 160 struct i915_mmu_notifier *mn;
5cc9ed4b 161
ad46cb53
CW
162 mn = kmalloc(sizeof(*mn), GFP_KERNEL);
163 if (mn == NULL)
5cc9ed4b
CW
164 return ERR_PTR(-ENOMEM);
165
ad46cb53
CW
166 spin_lock_init(&mn->lock);
167 mn->mn.ops = &i915_gem_userptr_notifier;
f808c13f 168 mn->objects = RB_ROOT_CACHED;
484d9a84 169 mn->mm = mm;
ad46cb53 170
ad46cb53 171 return mn;
5cc9ed4b
CW
172}
173
5cc9ed4b
CW
174static void
175i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
176{
ad46cb53 177 struct i915_mmu_object *mo;
5cc9ed4b 178
484d9a84
CW
179 mo = fetch_and_zero(&obj->userptr.mmu_object);
180 if (!mo)
5cc9ed4b
CW
181 return;
182
768e159f
CW
183 spin_lock(&mo->mn->lock);
184 del_object(mo);
185 spin_unlock(&mo->mn->lock);
ad46cb53 186 kfree(mo);
ad46cb53
CW
187}
188
189static struct i915_mmu_notifier *
190i915_mmu_notifier_find(struct i915_mm_struct *mm)
191{
7741b547
DV
192 struct i915_mmu_notifier *mn;
193 int err = 0;
e9681366
CW
194
195 mn = mm->mn;
196 if (mn)
197 return mn;
198
484d9a84 199 mn = i915_mmu_notifier_create(mm);
7741b547
DV
200 if (IS_ERR(mn))
201 err = PTR_ERR(mn);
202
d8ed45c5 203 mmap_write_lock(mm->mm);
f470b190 204 mutex_lock(&mm->i915->mm_lock);
7741b547 205 if (mm->mn == NULL && !err) {
c1e8d7c6 206 /* Protected by mmap_lock (write-lock) */
7741b547
DV
207 err = __mmu_notifier_register(&mn->mn, mm->mm);
208 if (!err) {
209 /* Protected by mm_lock */
210 mm->mn = fetch_and_zero(&mn);
211 }
cb8d50df
TU
212 } else if (mm->mn) {
213 /*
214 * Someone else raced and successfully installed the mmu
215 * notifier, we can cancel our own errors.
216 */
7741b547 217 err = 0;
ad46cb53 218 }
f470b190 219 mutex_unlock(&mm->i915->mm_lock);
d8ed45c5 220 mmap_write_unlock(mm->mm);
e9681366 221
484d9a84 222 if (mn && !IS_ERR(mn))
7741b547 223 kfree(mn);
7741b547
DV
224
225 return err ? ERR_PTR(err) : mm->mn;
5cc9ed4b
CW
226}
227
228static int
229i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
230 unsigned flags)
231{
ad46cb53
CW
232 struct i915_mmu_notifier *mn;
233 struct i915_mmu_object *mo;
5cc9ed4b
CW
234
235 if (flags & I915_USERPTR_UNSYNCHRONIZED)
236 return capable(CAP_SYS_ADMIN) ? 0 : -EPERM;
237
ad46cb53
CW
238 if (WARN_ON(obj->userptr.mm == NULL))
239 return -EINVAL;
5cc9ed4b 240
ad46cb53
CW
241 mn = i915_mmu_notifier_find(obj->userptr.mm);
242 if (IS_ERR(mn))
243 return PTR_ERR(mn);
5cc9ed4b 244
ad46cb53 245 mo = kzalloc(sizeof(*mo), GFP_KERNEL);
484d9a84 246 if (!mo)
ad46cb53 247 return -ENOMEM;
5cc9ed4b 248
ad46cb53 249 mo->mn = mn;
ad46cb53 250 mo->obj = obj;
768e159f
CW
251 mo->it.start = obj->userptr.ptr;
252 mo->it.last = obj->userptr.ptr + obj->base.size - 1;
484d9a84 253 RB_CLEAR_NODE(&mo->it.rb);
ad46cb53
CW
254
255 obj->userptr.mmu_object = mo;
5cc9ed4b 256 return 0;
ad46cb53
CW
257}
258
259static void
260i915_mmu_notifier_free(struct i915_mmu_notifier *mn,
261 struct mm_struct *mm)
262{
263 if (mn == NULL)
264 return;
5cc9ed4b 265
ad46cb53 266 mmu_notifier_unregister(&mn->mn, mm);
5cc9ed4b 267 kfree(mn);
5cc9ed4b
CW
268}
269
270#else
271
484d9a84
CW
272static void
273__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value)
274{
275}
276
5cc9ed4b
CW
277static void
278i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
279{
280}
281
282static int
283i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
284 unsigned flags)
285{
286 if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0)
287 return -ENODEV;
288
289 if (!capable(CAP_SYS_ADMIN))
290 return -EPERM;
291
292 return 0;
293}
ad46cb53
CW
294
295static void
296i915_mmu_notifier_free(struct i915_mmu_notifier *mn,
297 struct mm_struct *mm)
298{
299}
300
5cc9ed4b
CW
301#endif
302
ad46cb53
CW
303static struct i915_mm_struct *
304__i915_mm_struct_find(struct drm_i915_private *dev_priv, struct mm_struct *real)
305{
306 struct i915_mm_struct *mm;
307
308 /* Protected by dev_priv->mm_lock */
309 hash_for_each_possible(dev_priv->mm_structs, mm, node, (unsigned long)real)
310 if (mm->mm == real)
311 return mm;
312
313 return NULL;
314}
315
316static int
317i915_gem_userptr_init__mm_struct(struct drm_i915_gem_object *obj)
318{
319 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
320 struct i915_mm_struct *mm;
321 int ret = 0;
322
323 /* During release of the GEM object we hold the struct_mutex. This
324 * precludes us from calling mmput() at that time as that may be
325 * the last reference and so call exit_mmap(). exit_mmap() will
326 * attempt to reap the vma, and if we were holding a GTT mmap
327 * would then call drm_gem_vm_close() and attempt to reacquire
328 * the struct mutex. So in order to avoid that recursion, we have
329 * to defer releasing the mm reference until after we drop the
330 * struct_mutex, i.e. we need to schedule a worker to do the clean
331 * up.
332 */
333 mutex_lock(&dev_priv->mm_lock);
334 mm = __i915_mm_struct_find(dev_priv, current->mm);
335 if (mm == NULL) {
336 mm = kmalloc(sizeof(*mm), GFP_KERNEL);
337 if (mm == NULL) {
338 ret = -ENOMEM;
339 goto out;
340 }
341
342 kref_init(&mm->kref);
f470b190 343 mm->i915 = to_i915(obj->base.dev);
ad46cb53
CW
344
345 mm->mm = current->mm;
f1f10076 346 mmgrab(current->mm);
ad46cb53
CW
347
348 mm->mn = NULL;
349
350 /* Protected by dev_priv->mm_lock */
351 hash_add(dev_priv->mm_structs,
352 &mm->node, (unsigned long)mm->mm);
353 } else
354 kref_get(&mm->kref);
355
356 obj->userptr.mm = mm;
357out:
358 mutex_unlock(&dev_priv->mm_lock);
359 return ret;
360}
361
362static void
363__i915_mm_struct_free__worker(struct work_struct *work)
364{
365 struct i915_mm_struct *mm = container_of(work, typeof(*mm), work);
366 i915_mmu_notifier_free(mm->mn, mm->mm);
367 mmdrop(mm->mm);
368 kfree(mm);
369}
370
371static void
372__i915_mm_struct_free(struct kref *kref)
373{
374 struct i915_mm_struct *mm = container_of(kref, typeof(*mm), kref);
375
376 /* Protected by dev_priv->mm_lock */
377 hash_del(&mm->node);
f470b190 378 mutex_unlock(&mm->i915->mm_lock);
ad46cb53
CW
379
380 INIT_WORK(&mm->work, __i915_mm_struct_free__worker);
8a2421bd 381 queue_work(mm->i915->mm.userptr_wq, &mm->work);
ad46cb53
CW
382}
383
384static void
385i915_gem_userptr_release__mm_struct(struct drm_i915_gem_object *obj)
386{
387 if (obj->userptr.mm == NULL)
388 return;
389
390 kref_put_mutex(&obj->userptr.mm->kref,
391 __i915_mm_struct_free,
392 &to_i915(obj->base.dev)->mm_lock);
393 obj->userptr.mm = NULL;
394}
395
5cc9ed4b
CW
396struct get_pages_work {
397 struct work_struct work;
398 struct drm_i915_gem_object *obj;
399 struct task_struct *task;
400};
401
03ac84f1 402static struct sg_table *
5602452e 403__i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj,
8e78871b 404 struct page **pvec, unsigned long num_pages)
e2273302 405{
5602452e
TU
406 unsigned int max_segment = i915_sg_segment_size();
407 struct sg_table *st;
84e8978e 408 unsigned int sg_page_sizes;
e2273302
ID
409 int ret;
410
5602452e
TU
411 st = kmalloc(sizeof(*st), GFP_KERNEL);
412 if (!st)
413 return ERR_PTR(-ENOMEM);
414
415alloc_table:
416 ret = __sg_alloc_table_from_pages(st, pvec, num_pages,
417 0, num_pages << PAGE_SHIFT,
418 max_segment,
419 GFP_KERNEL);
420 if (ret) {
421 kfree(st);
03ac84f1 422 return ERR_PTR(ret);
5602452e 423 }
e2273302 424
5602452e 425 ret = i915_gem_gtt_prepare_pages(obj, st);
e2273302 426 if (ret) {
5602452e
TU
427 sg_free_table(st);
428
429 if (max_segment > PAGE_SIZE) {
430 max_segment = PAGE_SIZE;
431 goto alloc_table;
432 }
433
434 kfree(st);
03ac84f1 435 return ERR_PTR(ret);
e2273302
ID
436 }
437
84e8978e 438 sg_page_sizes = i915_sg_page_sizes(st->sgl);
a5c08166 439
84e8978e 440 __i915_gem_object_set_pages(obj, st, sg_page_sizes);
b91b09ee 441
5602452e 442 return st;
e2273302
ID
443}
444
5cc9ed4b
CW
445static void
446__i915_gem_userptr_get_pages_worker(struct work_struct *_work)
447{
448 struct get_pages_work *work = container_of(_work, typeof(*work), work);
449 struct drm_i915_gem_object *obj = work->obj;
8e78871b
MA
450 const unsigned long npages = obj->base.size >> PAGE_SHIFT;
451 unsigned long pinned;
5cc9ed4b 452 struct page **pvec;
8e78871b 453 int ret;
5cc9ed4b
CW
454
455 ret = -ENOMEM;
456 pinned = 0;
457
0ee931c4 458 pvec = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
5cc9ed4b 459 if (pvec != NULL) {
ad46cb53 460 struct mm_struct *mm = obj->userptr.mm->mm;
9beae1ea 461 unsigned int flags = 0;
d323c9b8 462 int locked = 0;
9beae1ea 463
0b100760 464 if (!i915_gem_object_is_readonly(obj))
9beae1ea 465 flags |= FOLL_WRITE;
5cc9ed4b 466
40313f0c 467 ret = -EFAULT;
388f7934 468 if (mmget_not_zero(mm)) {
40313f0c 469 while (pinned < npages) {
d323c9b8 470 if (!locked) {
d8ed45c5 471 mmap_read_lock(mm);
d323c9b8
CW
472 locked = 1;
473 }
2170ecfa 474 ret = pin_user_pages_remote
40313f0c
CW
475 (work->task, mm,
476 obj->userptr.ptr + pinned * PAGE_SIZE,
477 npages - pinned,
9beae1ea 478 flags,
d323c9b8 479 pvec + pinned, NULL, &locked);
40313f0c
CW
480 if (ret < 0)
481 break;
482
483 pinned += ret;
484 }
d323c9b8 485 if (locked)
d8ed45c5 486 mmap_read_unlock(mm);
40313f0c 487 mmput(mm);
5cc9ed4b 488 }
5cc9ed4b
CW
489 }
490
f86dbacb 491 mutex_lock_nested(&obj->mm.lock, I915_MM_GET_PAGES);
68d6c840 492 if (obj->userptr.work == &work->work) {
03ac84f1
CW
493 struct sg_table *pages = ERR_PTR(ret);
494
68d6c840 495 if (pinned == npages) {
5602452e
TU
496 pages = __i915_gem_userptr_alloc_pages(obj, pvec,
497 npages);
03ac84f1 498 if (!IS_ERR(pages)) {
68d6c840 499 pinned = 0;
03ac84f1 500 pages = NULL;
68d6c840 501 }
5cc9ed4b 502 }
03ac84f1
CW
503
504 obj->userptr.work = ERR_CAST(pages);
42953b3c
CW
505 if (IS_ERR(pages))
506 __i915_gem_userptr_set_active(obj, false);
5cc9ed4b 507 }
1233e2db 508 mutex_unlock(&obj->mm.lock);
5cc9ed4b 509
2170ecfa 510 unpin_user_pages(pvec, pinned);
2098105e 511 kvfree(pvec);
5cc9ed4b 512
f0cd5182 513 i915_gem_object_put(obj);
5cc9ed4b
CW
514 put_task_struct(work->task);
515 kfree(work);
516}
517
03ac84f1 518static struct sg_table *
1c8782dd 519__i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj)
e4b946bf
CW
520{
521 struct get_pages_work *work;
522
523 /* Spawn a worker so that we can acquire the
524 * user pages without holding our mutex. Access
c1e8d7c6
ML
525 * to the user pages requires mmap_lock, and we have
526 * a strict lock ordering of mmap_lock, struct_mutex -
e4b946bf
CW
527 * we already hold struct_mutex here and so cannot
528 * call gup without encountering a lock inversion.
529 *
530 * Userspace will keep on repeating the operation
531 * (thanks to EAGAIN) until either we hit the fast
532 * path or the worker completes. If the worker is
533 * cancelled or superseded, the task is still run
534 * but the results ignored. (This leads to
535 * complications that we may have a stray object
536 * refcount that we need to be wary of when
537 * checking for existing objects during creation.)
538 * If the worker encounters an error, it reports
539 * that error back to this function through
540 * obj->userptr.work = ERR_PTR.
541 */
e4b946bf
CW
542 work = kmalloc(sizeof(*work), GFP_KERNEL);
543 if (work == NULL)
03ac84f1 544 return ERR_PTR(-ENOMEM);
e4b946bf
CW
545
546 obj->userptr.work = &work->work;
e4b946bf 547
25dc556a 548 work->obj = i915_gem_object_get(obj);
e4b946bf
CW
549
550 work->task = current;
551 get_task_struct(work->task);
552
553 INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker);
8a2421bd 554 queue_work(to_i915(obj->base.dev)->mm.userptr_wq, &work->work);
e4b946bf 555
03ac84f1 556 return ERR_PTR(-EAGAIN);
e4b946bf
CW
557}
558
b91b09ee 559static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
5cc9ed4b 560{
8e78871b 561 const unsigned long num_pages = obj->base.size >> PAGE_SHIFT;
1c8782dd 562 struct mm_struct *mm = obj->userptr.mm->mm;
5cc9ed4b 563 struct page **pvec;
03ac84f1 564 struct sg_table *pages;
e4b946bf 565 bool active;
1c8782dd 566 int pinned;
2170ecfa 567 unsigned int gup_flags = 0;
5cc9ed4b
CW
568
569 /* If userspace should engineer that these pages are replaced in
570 * the vma between us binding this page into the GTT and completion
571 * of rendering... Their loss. If they change the mapping of their
572 * pages they need to create a new bo to point to the new vma.
573 *
574 * However, that still leaves open the possibility of the vma
575 * being copied upon fork. Which falls under the same userspace
576 * synchronisation issue as a regular bo, except that this time
577 * the process may not be expecting that a particular piece of
578 * memory is tied to the GPU.
579 *
580 * Fortunately, we can hook into the mmu_notifier in order to
581 * discard the page references prior to anything nasty happening
582 * to the vma (discard or cloning) which should prevent the more
583 * egregious cases from causing harm.
584 */
364c8172
CW
585
586 if (obj->userptr.work) {
e4b946bf 587 /* active flag should still be held for the pending work */
364c8172 588 if (IS_ERR(obj->userptr.work))
b91b09ee 589 return PTR_ERR(obj->userptr.work);
364c8172 590 else
b91b09ee 591 return -EAGAIN;
364c8172 592 }
e4b946bf 593
5cc9ed4b
CW
594 pvec = NULL;
595 pinned = 0;
5cc9ed4b 596
15c344f4 597 if (mm == current->mm) {
2098105e 598 pvec = kvmalloc_array(num_pages, sizeof(struct page *),
0ee931c4 599 GFP_KERNEL |
1c8782dd
CW
600 __GFP_NORETRY |
601 __GFP_NOWARN);
17839856
LT
602 /*
603 * Using __get_user_pages_fast() with a read-only
604 * access is questionable. A read-only page may be
605 * COW-broken, and then this might end up giving
606 * the wrong side of the COW..
607 *
608 * We may or may not care.
609 */
2170ecfa
JH
610 if (pvec) {
611 /* defer to worker if malloc fails */
612 if (!i915_gem_object_is_readonly(obj))
613 gup_flags |= FOLL_WRITE;
614 pinned = pin_user_pages_fast_only(obj->userptr.ptr,
615 num_pages, gup_flags,
616 pvec);
617 }
5cc9ed4b 618 }
e4b946bf
CW
619
620 active = false;
1c8782dd
CW
621 if (pinned < 0) {
622 pages = ERR_PTR(pinned);
623 pinned = 0;
624 } else if (pinned < num_pages) {
625 pages = __i915_gem_userptr_get_pages_schedule(obj);
626 active = pages == ERR_PTR(-EAGAIN);
627 } else {
5602452e 628 pages = __i915_gem_userptr_alloc_pages(obj, pvec, num_pages);
1c8782dd 629 active = !IS_ERR(pages);
5cc9ed4b 630 }
1c8782dd
CW
631 if (active)
632 __i915_gem_userptr_set_active(obj, true);
1c8782dd
CW
633
634 if (IS_ERR(pages))
2170ecfa 635 unpin_user_pages(pvec, pinned);
2098105e 636 kvfree(pvec);
1c8782dd 637
b91b09ee 638 return PTR_ERR_OR_ZERO(pages);
5cc9ed4b
CW
639}
640
641static void
03ac84f1
CW
642i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj,
643 struct sg_table *pages)
5cc9ed4b 644{
85d1225e
DG
645 struct sgt_iter sgt_iter;
646 struct page *page;
5cc9ed4b 647
484d9a84
CW
648 /* Cancel any inflight work and force them to restart their gup */
649 obj->userptr.work = NULL;
e4b946bf 650 __i915_gem_userptr_set_active(obj, false);
484d9a84
CW
651 if (!pages)
652 return;
5cc9ed4b 653
ee8efa80 654 __i915_gem_object_release_shmem(obj, pages, true);
03ac84f1 655 i915_gem_gtt_finish_pages(obj, pages);
e2273302 656
681c774d
CW
657 /*
658 * We always mark objects as dirty when they are used by the GPU,
659 * just in case. However, if we set the vma as being read-only we know
660 * that the object will never have been written to.
661 */
662 if (i915_gem_object_is_readonly(obj))
663 obj->mm.dirty = false;
664
03ac84f1 665 for_each_sgt_page(page, sgt_iter, pages) {
0d4bbe3d
CW
666 if (obj->mm.dirty && trylock_page(page)) {
667 /*
668 * As this may not be anonymous memory (e.g. shmem)
669 * but exist on a real mapping, we have to lock
670 * the page in order to dirty it -- holding
671 * the page reference is not sufficient to
672 * prevent the inode from being truncated.
673 * Play safe and take the lock.
674 *
675 * However...!
676 *
677 * The mmu-notifier can be invalidated for a
678 * migrate_page, that is alreadying holding the lock
679 * on the page. Such a try_to_unmap() will result
680 * in us calling put_pages() and so recursively try
681 * to lock the page. We avoid that deadlock with
682 * a trylock_page() and in exchange we risk missing
683 * some page dirtying.
684 */
505a8ec7 685 set_page_dirty(page);
0d4bbe3d
CW
686 unlock_page(page);
687 }
5cc9ed4b
CW
688
689 mark_page_accessed(page);
2170ecfa 690 unpin_user_page(page);
5cc9ed4b 691 }
a4f5ea64 692 obj->mm.dirty = false;
5cc9ed4b 693
03ac84f1
CW
694 sg_free_table(pages);
695 kfree(pages);
5cc9ed4b
CW
696}
697
698static void
699i915_gem_userptr_release(struct drm_i915_gem_object *obj)
700{
701 i915_gem_userptr_release__mmu_notifier(obj);
ad46cb53 702 i915_gem_userptr_release__mm_struct(obj);
5cc9ed4b
CW
703}
704
705static int
706i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj)
707{
ad46cb53 708 if (obj->userptr.mmu_object)
5cc9ed4b
CW
709 return 0;
710
711 return i915_gem_userptr_init__mmu_notifier(obj, 0);
712}
713
714static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
3599a91c 715 .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
484d9a84 716 I915_GEM_OBJECT_IS_SHRINKABLE |
f6c26b55 717 I915_GEM_OBJECT_NO_MMAP |
484d9a84 718 I915_GEM_OBJECT_ASYNC_CANCEL,
5cc9ed4b
CW
719 .get_pages = i915_gem_userptr_get_pages,
720 .put_pages = i915_gem_userptr_put_pages,
de472664 721 .dmabuf_export = i915_gem_userptr_dmabuf_export,
5cc9ed4b
CW
722 .release = i915_gem_userptr_release,
723};
724
a5a5ae2a 725/*
5cc9ed4b
CW
726 * Creates a new mm object that wraps some normal memory from the process
727 * context - user memory.
728 *
729 * We impose several restrictions upon the memory being mapped
730 * into the GPU.
731 * 1. It must be page aligned (both start/end addresses, i.e ptr and size).
ec8b0dd5 732 * 2. It must be normal system memory, not a pointer into another map of IO
5cc9ed4b 733 * space (e.g. it must not be a GTT mmapping of another object).
ec8b0dd5 734 * 3. We only allow a bo as large as we could in theory map into the GTT,
5cc9ed4b 735 * that is we limit the size to the total size of the GTT.
ec8b0dd5 736 * 4. The bo is marked as being snoopable. The backing pages are left
5cc9ed4b
CW
737 * accessible directly by the CPU, but reads and writes by the GPU may
738 * incur the cost of a snoop (unless you have an LLC architecture).
739 *
740 * Synchronisation between multiple users and the GPU is left to userspace
741 * through the normal set-domain-ioctl. The kernel will enforce that the
742 * GPU relinquishes the VMA before it is returned back to the system
743 * i.e. upon free(), munmap() or process termination. However, the userspace
744 * malloc() library may not immediately relinquish the VMA after free() and
745 * instead reuse it whilst the GPU is still reading and writing to the VMA.
746 * Caveat emptor.
747 *
748 * Also note, that the object created here is not currently a "first class"
749 * object, in that several ioctls are banned. These are the CPU access
750 * ioctls: mmap(), pwrite and pread. In practice, you are expected to use
cc917ab4
CW
751 * direct access via your pointer rather than use those ioctls. Another
752 * restriction is that we do not allow userptr surfaces to be pinned to the
753 * hardware and so we reject any attempt to create a framebuffer out of a
754 * userptr.
5cc9ed4b
CW
755 *
756 * If you think this is a good interface to use to pass GPU memory between
757 * drivers, please use dma-buf instead. In fact, wherever possible use
758 * dma-buf instead.
759 */
760int
a5a5ae2a
CW
761i915_gem_userptr_ioctl(struct drm_device *dev,
762 void *data,
763 struct drm_file *file)
5cc9ed4b 764{
7867d709 765 static struct lock_class_key lock_class;
0031fb96 766 struct drm_i915_private *dev_priv = to_i915(dev);
5cc9ed4b
CW
767 struct drm_i915_gem_userptr *args = data;
768 struct drm_i915_gem_object *obj;
769 int ret;
770 u32 handle;
771
0031fb96 772 if (!HAS_LLC(dev_priv) && !HAS_SNOOP(dev_priv)) {
ca377809
TU
773 /* We cannot support coherent userptr objects on hw without
774 * LLC and broken snooping.
775 */
776 return -ENODEV;
777 }
778
5cc9ed4b
CW
779 if (args->flags & ~(I915_USERPTR_READ_ONLY |
780 I915_USERPTR_UNSYNCHRONIZED))
781 return -EINVAL;
782
24860ad7
MA
783 /*
784 * XXX: There is a prevalence of the assumption that we fit the
785 * object's page count inside a 32bit _signed_ variable. Let's document
786 * this and catch if we ever need to fix it. In the meantime, if you do
787 * spot such a local variable, please consider fixing!
788 *
789 * Aside from our own locals (for which we have no excuse!):
790 * - sg_table embeds unsigned int for num_pages
791 * - get_user_pages*() mixed ints with longs
792 */
793
794 if (args->user_size >> PAGE_SHIFT > INT_MAX)
795 return -E2BIG;
796
797 if (overflows_type(args->user_size, obj->base.size))
798 return -E2BIG;
799
c11c7bfd
MA
800 if (!args->user_size)
801 return -EINVAL;
802
5cc9ed4b
CW
803 if (offset_in_page(args->user_ptr | args->user_size))
804 return -EINVAL;
805
96d4f267 806 if (!access_ok((char __user *)(unsigned long)args->user_ptr, args->user_size))
5cc9ed4b
CW
807 return -EFAULT;
808
809 if (args->flags & I915_USERPTR_READ_ONLY) {
0b100760
CW
810 /*
811 * On almost all of the older hw, we cannot tell the GPU that
812 * a page is readonly.
5cc9ed4b 813 */
e6ba7648 814 if (!dev_priv->gt.vm->has_read_only)
0b100760 815 return -ENODEV;
5cc9ed4b
CW
816 }
817
13f1bfd3 818 obj = i915_gem_object_alloc();
5cc9ed4b
CW
819 if (obj == NULL)
820 return -ENOMEM;
821
822 drm_gem_private_object_init(dev, &obj->base, args->user_size);
7867d709 823 i915_gem_object_init(obj, &i915_gem_userptr_ops, &lock_class);
c0a51fd0
CK
824 obj->read_domains = I915_GEM_DOMAIN_CPU;
825 obj->write_domain = I915_GEM_DOMAIN_CPU;
b8f55be6 826 i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
5cc9ed4b
CW
827
828 obj->userptr.ptr = args->user_ptr;
0b100760
CW
829 if (args->flags & I915_USERPTR_READ_ONLY)
830 i915_gem_object_set_readonly(obj);
5cc9ed4b
CW
831
832 /* And keep a pointer to the current->mm for resolving the user pages
833 * at binding. This means that we need to hook into the mmu_notifier
834 * in order to detect if the mmu is destroyed.
835 */
ad46cb53
CW
836 ret = i915_gem_userptr_init__mm_struct(obj);
837 if (ret == 0)
5cc9ed4b
CW
838 ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags);
839 if (ret == 0)
840 ret = drm_gem_handle_create(file, &obj->base, &handle);
841
842 /* drop reference from allocate - handle holds it now */
f0cd5182 843 i915_gem_object_put(obj);
5cc9ed4b
CW
844 if (ret)
845 return ret;
846
847 args->handle = handle;
848 return 0;
849}
850
8a2421bd 851int i915_gem_init_userptr(struct drm_i915_private *dev_priv)
5cc9ed4b 852{
ad46cb53
CW
853 mutex_init(&dev_priv->mm_lock);
854 hash_init(dev_priv->mm_structs);
8a2421bd
CW
855
856 dev_priv->mm.userptr_wq =
21cc6431 857 alloc_workqueue("i915-userptr-acquire",
457db89b 858 WQ_HIGHPRI | WQ_UNBOUND,
21cc6431 859 0);
8a2421bd
CW
860 if (!dev_priv->mm.userptr_wq)
861 return -ENOMEM;
862
863 return 0;
864}
865
866void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv)
867{
868 destroy_workqueue(dev_priv->mm.userptr_wq);
5cc9ed4b 869}