]>
Commit | Line | Data |
---|---|---|
a46a2cd1 FK |
1 | /* |
2 | * Copyright 2014-2018 Advanced Micro Devices, Inc. | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice shall be included in | |
12 | * all copies or substantial portions of the Software. | |
13 | * | |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
20 | * OTHER DEALINGS IN THE SOFTWARE. | |
21 | */ | |
c366be54 | 22 | #include <linux/dma-buf.h> |
a46a2cd1 | 23 | #include <linux/list.h> |
548da31d | 24 | #include <linux/pagemap.h> |
5ae0283e | 25 | #include <linux/sched/mm.h> |
c366be54 SR |
26 | #include <linux/sched/task.h> |
27 | ||
a46a2cd1 FK |
28 | #include "amdgpu_object.h" |
29 | #include "amdgpu_vm.h" | |
30 | #include "amdgpu_amdkfd.h" | |
2fbd6f94 | 31 | #include "amdgpu_dma_buf.h" |
1d251d90 | 32 | #include <uapi/linux/kfd_ioctl.h> |
a46a2cd1 | 33 | |
5ae0283e FK |
34 | /* BO flag to indicate a KFD userptr BO */ |
35 | #define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63) | |
36 | ||
37 | /* Userptr restore delay, just long enough to allow consecutive VM | |
38 | * changes to accumulate | |
39 | */ | |
40 | #define AMDGPU_USERPTR_RESTORE_DELAY_MS 1 | |
41 | ||
a46a2cd1 FK |
42 | /* Impose limit on how much memory KFD can use */ |
43 | static struct { | |
44 | uint64_t max_system_mem_limit; | |
5d240da9 | 45 | uint64_t max_ttm_mem_limit; |
a46a2cd1 | 46 | int64_t system_mem_used; |
5d240da9 | 47 | int64_t ttm_mem_used; |
a46a2cd1 FK |
48 | spinlock_t mem_limit_lock; |
49 | } kfd_mem_limit; | |
50 | ||
51 | /* Struct used for amdgpu_amdkfd_bo_validate */ | |
52 | struct amdgpu_vm_parser { | |
53 | uint32_t domain; | |
54 | bool wait; | |
55 | }; | |
56 | ||
57 | static const char * const domain_bit_to_string[] = { | |
58 | "CPU", | |
59 | "GTT", | |
60 | "VRAM", | |
61 | "GDS", | |
62 | "GWS", | |
63 | "OA" | |
64 | }; | |
65 | ||
66 | #define domain_string(domain) domain_bit_to_string[ffs(domain)-1] | |
67 | ||
5ae0283e | 68 | static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work); |
a46a2cd1 FK |
69 | |
70 | ||
71 | static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) | |
72 | { | |
73 | return (struct amdgpu_device *)kgd; | |
74 | } | |
75 | ||
76 | static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm, | |
77 | struct kgd_mem *mem) | |
78 | { | |
79 | struct kfd_bo_va_list *entry; | |
80 | ||
81 | list_for_each_entry(entry, &mem->bo_va_list, bo_list) | |
82 | if (entry->bo_va->base.vm == avm) | |
83 | return false; | |
84 | ||
85 | return true; | |
86 | } | |
87 | ||
88 | /* Set memory usage limits. Current, limits are | |
b72ff190 | 89 | * System (TTM + userptr) memory - 15/16th System RAM |
5d240da9 | 90 | * TTM memory - 3/8th System RAM |
a46a2cd1 FK |
91 | */ |
92 | void amdgpu_amdkfd_gpuvm_init_mem_limits(void) | |
93 | { | |
94 | struct sysinfo si; | |
95 | uint64_t mem; | |
96 | ||
97 | si_meminfo(&si); | |
98 | mem = si.totalram - si.totalhigh; | |
99 | mem *= si.mem_unit; | |
100 | ||
101 | spin_lock_init(&kfd_mem_limit.mem_limit_lock); | |
b72ff190 | 102 | kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4); |
5d240da9 EH |
103 | kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3); |
104 | pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n", | |
5ae0283e | 105 | (kfd_mem_limit.max_system_mem_limit >> 20), |
5d240da9 | 106 | (kfd_mem_limit.max_ttm_mem_limit >> 20)); |
a46a2cd1 FK |
107 | } |
108 | ||
29a39c90 FK |
109 | /* Estimate page table size needed to represent a given memory size |
110 | * | |
111 | * With 4KB pages, we need one 8 byte PTE for each 4KB of memory | |
112 | * (factor 512, >> 9). With 2MB pages, we need one 8 byte PTE for 2MB | |
113 | * of memory (factor 256K, >> 18). ROCm user mode tries to optimize | |
114 | * for 2MB pages for TLB efficiency. However, small allocations and | |
115 | * fragmented system memory still need some 4KB pages. We choose a | |
116 | * compromise that should work in most cases without reserving too | |
117 | * much memory for page tables unnecessarily (factor 16K, >> 14). | |
118 | */ | |
119 | #define ESTIMATE_PT_SIZE(mem_size) ((mem_size) >> 14) | |
120 | ||
611736d8 | 121 | static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, |
5d240da9 | 122 | uint64_t size, u32 domain, bool sg) |
a46a2cd1 | 123 | { |
29a39c90 FK |
124 | uint64_t reserved_for_pt = |
125 | ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); | |
611736d8 | 126 | size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed; |
a46a2cd1 FK |
127 | int ret = 0; |
128 | ||
129 | acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size, | |
130 | sizeof(struct amdgpu_bo)); | |
131 | ||
611736d8 | 132 | vram_needed = 0; |
a46a2cd1 | 133 | if (domain == AMDGPU_GEM_DOMAIN_GTT) { |
5d240da9 EH |
134 | /* TTM GTT memory */ |
135 | system_mem_needed = acc_size + size; | |
136 | ttm_mem_needed = acc_size + size; | |
137 | } else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) { | |
138 | /* Userptr */ | |
139 | system_mem_needed = acc_size + size; | |
140 | ttm_mem_needed = acc_size; | |
141 | } else { | |
142 | /* VRAM and SG */ | |
143 | system_mem_needed = acc_size; | |
144 | ttm_mem_needed = acc_size; | |
611736d8 FK |
145 | if (domain == AMDGPU_GEM_DOMAIN_VRAM) |
146 | vram_needed = size; | |
5d240da9 EH |
147 | } |
148 | ||
611736d8 FK |
149 | spin_lock(&kfd_mem_limit.mem_limit_lock); |
150 | ||
5d240da9 | 151 | if ((kfd_mem_limit.system_mem_used + system_mem_needed > |
611736d8 FK |
152 | kfd_mem_limit.max_system_mem_limit) || |
153 | (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > | |
154 | kfd_mem_limit.max_ttm_mem_limit) || | |
155 | (adev->kfd.vram_used + vram_needed > | |
156 | adev->gmc.real_vram_size - reserved_for_pt)) { | |
5d240da9 | 157 | ret = -ENOMEM; |
611736d8 | 158 | } else { |
5d240da9 EH |
159 | kfd_mem_limit.system_mem_used += system_mem_needed; |
160 | kfd_mem_limit.ttm_mem_used += ttm_mem_needed; | |
611736d8 | 161 | adev->kfd.vram_used += vram_needed; |
a46a2cd1 | 162 | } |
5d240da9 | 163 | |
a46a2cd1 FK |
164 | spin_unlock(&kfd_mem_limit.mem_limit_lock); |
165 | return ret; | |
166 | } | |
167 | ||
611736d8 | 168 | static void unreserve_mem_limit(struct amdgpu_device *adev, |
5d240da9 | 169 | uint64_t size, u32 domain, bool sg) |
a46a2cd1 FK |
170 | { |
171 | size_t acc_size; | |
172 | ||
173 | acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size, | |
174 | sizeof(struct amdgpu_bo)); | |
175 | ||
176 | spin_lock(&kfd_mem_limit.mem_limit_lock); | |
5ae0283e | 177 | if (domain == AMDGPU_GEM_DOMAIN_GTT) { |
a46a2cd1 | 178 | kfd_mem_limit.system_mem_used -= (acc_size + size); |
5d240da9 EH |
179 | kfd_mem_limit.ttm_mem_used -= (acc_size + size); |
180 | } else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) { | |
181 | kfd_mem_limit.system_mem_used -= (acc_size + size); | |
182 | kfd_mem_limit.ttm_mem_used -= acc_size; | |
183 | } else { | |
5ae0283e | 184 | kfd_mem_limit.system_mem_used -= acc_size; |
5d240da9 | 185 | kfd_mem_limit.ttm_mem_used -= acc_size; |
611736d8 FK |
186 | if (domain == AMDGPU_GEM_DOMAIN_VRAM) { |
187 | adev->kfd.vram_used -= size; | |
188 | WARN_ONCE(adev->kfd.vram_used < 0, | |
189 | "kfd VRAM memory accounting unbalanced"); | |
190 | } | |
5ae0283e | 191 | } |
a46a2cd1 FK |
192 | WARN_ONCE(kfd_mem_limit.system_mem_used < 0, |
193 | "kfd system memory accounting unbalanced"); | |
5d240da9 EH |
194 | WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0, |
195 | "kfd TTM memory accounting unbalanced"); | |
a46a2cd1 FK |
196 | |
197 | spin_unlock(&kfd_mem_limit.mem_limit_lock); | |
198 | } | |
199 | ||
611736d8 | 200 | void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo) |
a46a2cd1 | 201 | { |
611736d8 FK |
202 | struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); |
203 | u32 domain = bo->preferred_domains; | |
204 | bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU); | |
a46a2cd1 | 205 | |
5ae0283e | 206 | if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) { |
611736d8 FK |
207 | domain = AMDGPU_GEM_DOMAIN_CPU; |
208 | sg = false; | |
a46a2cd1 | 209 | } |
a46a2cd1 | 210 | |
611736d8 | 211 | unreserve_mem_limit(adev, amdgpu_bo_size(bo), domain, sg); |
a46a2cd1 FK |
212 | } |
213 | ||
214 | ||
2d086fde | 215 | /* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence from BO's |
a46a2cd1 FK |
216 | * reservation object. |
217 | * | |
218 | * @bo: [IN] Remove eviction fence(s) from this BO | |
2d086fde | 219 | * @ef: [IN] This eviction fence is removed if it |
a46a2cd1 | 220 | * is present in the shared list. |
a46a2cd1 | 221 | * |
a46a2cd1 FK |
222 | * NOTE: Must be called with BO reserved i.e. bo->tbo.resv->lock held. |
223 | */ | |
224 | static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, | |
2d086fde | 225 | struct amdgpu_amdkfd_fence *ef) |
a46a2cd1 | 226 | { |
52791eee CK |
227 | struct dma_resv *resv = bo->tbo.base.resv; |
228 | struct dma_resv_list *old, *new; | |
e6f8d26e | 229 | unsigned int i, j, k; |
a46a2cd1 | 230 | |
2d086fde | 231 | if (!ef) |
a46a2cd1 FK |
232 | return -EINVAL; |
233 | ||
52791eee | 234 | old = dma_resv_get_list(resv); |
e6f8d26e | 235 | if (!old) |
a46a2cd1 FK |
236 | return 0; |
237 | ||
e6f8d26e CK |
238 | new = kmalloc(offsetof(typeof(*new), shared[old->shared_max]), |
239 | GFP_KERNEL); | |
240 | if (!new) | |
241 | return -ENOMEM; | |
a46a2cd1 | 242 | |
e6f8d26e CK |
243 | /* Go through all the shared fences in the resevation object and sort |
244 | * the interesting ones to the end of the list. | |
a46a2cd1 | 245 | */ |
e6f8d26e | 246 | for (i = 0, j = old->shared_count, k = 0; i < old->shared_count; ++i) { |
a46a2cd1 FK |
247 | struct dma_fence *f; |
248 | ||
e6f8d26e | 249 | f = rcu_dereference_protected(old->shared[i], |
52791eee | 250 | dma_resv_held(resv)); |
a46a2cd1 | 251 | |
2d086fde | 252 | if (f->context == ef->base.context) |
e6f8d26e CK |
253 | RCU_INIT_POINTER(new->shared[--j], f); |
254 | else | |
255 | RCU_INIT_POINTER(new->shared[k++], f); | |
a46a2cd1 | 256 | } |
e6f8d26e CK |
257 | new->shared_max = old->shared_max; |
258 | new->shared_count = k; | |
a46a2cd1 | 259 | |
e6f8d26e CK |
260 | /* Install the new fence list, seqcount provides the barriers */ |
261 | preempt_disable(); | |
262 | write_seqcount_begin(&resv->seq); | |
263 | RCU_INIT_POINTER(resv->fence, new); | |
a46a2cd1 FK |
264 | write_seqcount_end(&resv->seq); |
265 | preempt_enable(); | |
266 | ||
e6f8d26e CK |
267 | /* Drop the references to the removed fences or move them to ef_list */ |
268 | for (i = j, k = 0; i < old->shared_count; ++i) { | |
269 | struct dma_fence *f; | |
270 | ||
271 | f = rcu_dereference_protected(new->shared[i], | |
52791eee | 272 | dma_resv_held(resv)); |
2d086fde | 273 | dma_fence_put(f); |
e6f8d26e CK |
274 | } |
275 | kfree_rcu(old, rcu); | |
a46a2cd1 FK |
276 | |
277 | return 0; | |
278 | } | |
279 | ||
f4a3c42b | 280 | int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo) |
281 | { | |
282 | struct amdgpu_bo *root = bo; | |
283 | struct amdgpu_vm_bo_base *vm_bo; | |
284 | struct amdgpu_vm *vm; | |
285 | struct amdkfd_process_info *info; | |
286 | struct amdgpu_amdkfd_fence *ef; | |
287 | int ret; | |
288 | ||
289 | /* we can always get vm_bo from root PD bo.*/ | |
290 | while (root->parent) | |
291 | root = root->parent; | |
292 | ||
293 | vm_bo = root->vm_bo; | |
294 | if (!vm_bo) | |
295 | return 0; | |
296 | ||
297 | vm = vm_bo->vm; | |
298 | if (!vm) | |
299 | return 0; | |
300 | ||
301 | info = vm->process_info; | |
302 | if (!info || !info->eviction_fence) | |
303 | return 0; | |
304 | ||
305 | ef = container_of(dma_fence_get(&info->eviction_fence->base), | |
306 | struct amdgpu_amdkfd_fence, base); | |
307 | ||
308 | BUG_ON(!dma_resv_trylock(bo->tbo.base.resv)); | |
309 | ret = amdgpu_amdkfd_remove_eviction_fence(bo, ef); | |
310 | dma_resv_unlock(bo->tbo.base.resv); | |
311 | ||
312 | dma_fence_put(&ef->base); | |
313 | return ret; | |
314 | } | |
315 | ||
a46a2cd1 FK |
316 | static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain, |
317 | bool wait) | |
318 | { | |
319 | struct ttm_operation_ctx ctx = { false, false }; | |
320 | int ret; | |
321 | ||
322 | if (WARN(amdgpu_ttm_tt_get_usermm(bo->tbo.ttm), | |
323 | "Called with userptr BO")) | |
324 | return -EINVAL; | |
325 | ||
c704ab18 | 326 | amdgpu_bo_placement_from_domain(bo, domain); |
a46a2cd1 FK |
327 | |
328 | ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); | |
329 | if (ret) | |
330 | goto validate_fail; | |
2d086fde | 331 | if (wait) |
c60cd590 | 332 | amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false); |
a46a2cd1 FK |
333 | |
334 | validate_fail: | |
335 | return ret; | |
336 | } | |
337 | ||
338 | static int amdgpu_amdkfd_validate(void *param, struct amdgpu_bo *bo) | |
339 | { | |
340 | struct amdgpu_vm_parser *p = param; | |
341 | ||
342 | return amdgpu_amdkfd_bo_validate(bo, p->domain, p->wait); | |
343 | } | |
344 | ||
345 | /* vm_validate_pt_pd_bos - Validate page table and directory BOs | |
346 | * | |
347 | * Page directories are not updated here because huge page handling | |
348 | * during page table updates can invalidate page directory entries | |
349 | * again. Page directories are only updated after updating page | |
350 | * tables. | |
351 | */ | |
5b21d3e5 | 352 | static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm) |
a46a2cd1 | 353 | { |
5b21d3e5 | 354 | struct amdgpu_bo *pd = vm->root.base.bo; |
a46a2cd1 FK |
355 | struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); |
356 | struct amdgpu_vm_parser param; | |
a46a2cd1 FK |
357 | int ret; |
358 | ||
359 | param.domain = AMDGPU_GEM_DOMAIN_VRAM; | |
360 | param.wait = false; | |
361 | ||
5b21d3e5 | 362 | ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate, |
a46a2cd1 FK |
363 | ¶m); |
364 | if (ret) { | |
365 | pr_err("amdgpu: failed to validate PT BOs\n"); | |
366 | return ret; | |
367 | } | |
368 | ||
369 | ret = amdgpu_amdkfd_validate(¶m, pd); | |
370 | if (ret) { | |
371 | pr_err("amdgpu: failed to validate PD\n"); | |
372 | return ret; | |
373 | } | |
374 | ||
11c3a249 | 375 | vm->pd_phys_addr = amdgpu_gmc_pd_addr(vm->root.base.bo); |
a46a2cd1 | 376 | |
5b21d3e5 | 377 | if (vm->use_cpu_for_update) { |
a46a2cd1 FK |
378 | ret = amdgpu_bo_kmap(pd, NULL); |
379 | if (ret) { | |
380 | pr_err("amdgpu: failed to kmap PD, ret=%d\n", ret); | |
381 | return ret; | |
382 | } | |
383 | } | |
384 | ||
385 | return 0; | |
386 | } | |
387 | ||
a46a2cd1 FK |
388 | static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) |
389 | { | |
390 | struct amdgpu_bo *pd = vm->root.base.bo; | |
391 | struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); | |
392 | int ret; | |
393 | ||
807e2994 | 394 | ret = amdgpu_vm_update_pdes(adev, vm, false); |
a46a2cd1 FK |
395 | if (ret) |
396 | return ret; | |
397 | ||
e095fc17 | 398 | return amdgpu_sync_fence(sync, vm->last_update, false); |
a46a2cd1 FK |
399 | } |
400 | ||
d0ba51b1 FK |
401 | static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) |
402 | { | |
e0253d08 | 403 | struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev); |
1d251d90 | 404 | bool coherent = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT; |
d0ba51b1 FK |
405 | uint32_t mapping_flags; |
406 | ||
407 | mapping_flags = AMDGPU_VM_PAGE_READABLE; | |
1d251d90 | 408 | if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE) |
d0ba51b1 | 409 | mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE; |
1d251d90 | 410 | if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE) |
d0ba51b1 FK |
411 | mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; |
412 | ||
e0253d08 FK |
413 | switch (adev->asic_type) { |
414 | case CHIP_ARCTURUS: | |
1d251d90 | 415 | if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { |
e0253d08 FK |
416 | if (bo_adev == adev) |
417 | mapping_flags |= coherent ? | |
418 | AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; | |
419 | else | |
420 | mapping_flags |= AMDGPU_VM_MTYPE_UC; | |
421 | } else { | |
422 | mapping_flags |= coherent ? | |
423 | AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; | |
424 | } | |
425 | break; | |
426 | default: | |
427 | mapping_flags |= coherent ? | |
428 | AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; | |
429 | } | |
d0ba51b1 | 430 | |
71776b6d | 431 | return amdgpu_gem_va_map_flags(adev, mapping_flags); |
d0ba51b1 FK |
432 | } |
433 | ||
a46a2cd1 FK |
434 | /* add_bo_to_vm - Add a BO to a VM |
435 | * | |
436 | * Everything that needs to bo done only once when a BO is first added | |
437 | * to a VM. It can later be mapped and unmapped many times without | |
438 | * repeating these steps. | |
439 | * | |
440 | * 1. Allocate and initialize BO VA entry data structure | |
441 | * 2. Add BO to the VM | |
442 | * 3. Determine ASIC-specific PTE flags | |
443 | * 4. Alloc page tables and directories if needed | |
444 | * 4a. Validate new page tables and directories | |
445 | */ | |
446 | static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem, | |
5b21d3e5 | 447 | struct amdgpu_vm *vm, bool is_aql, |
a46a2cd1 FK |
448 | struct kfd_bo_va_list **p_bo_va_entry) |
449 | { | |
450 | int ret; | |
451 | struct kfd_bo_va_list *bo_va_entry; | |
a46a2cd1 FK |
452 | struct amdgpu_bo *bo = mem->bo; |
453 | uint64_t va = mem->va; | |
454 | struct list_head *list_bo_va = &mem->bo_va_list; | |
455 | unsigned long bo_size = bo->tbo.mem.size; | |
456 | ||
457 | if (!va) { | |
458 | pr_err("Invalid VA when adding BO to VM\n"); | |
459 | return -EINVAL; | |
460 | } | |
461 | ||
462 | if (is_aql) | |
463 | va += bo_size; | |
464 | ||
465 | bo_va_entry = kzalloc(sizeof(*bo_va_entry), GFP_KERNEL); | |
466 | if (!bo_va_entry) | |
467 | return -ENOMEM; | |
468 | ||
469 | pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va, | |
5b21d3e5 | 470 | va + bo_size, vm); |
a46a2cd1 FK |
471 | |
472 | /* Add BO to VM internal data structures*/ | |
5b21d3e5 | 473 | bo_va_entry->bo_va = amdgpu_vm_bo_add(adev, vm, bo); |
a46a2cd1 FK |
474 | if (!bo_va_entry->bo_va) { |
475 | ret = -EINVAL; | |
476 | pr_err("Failed to add BO object to VM. ret == %d\n", | |
477 | ret); | |
478 | goto err_vmadd; | |
479 | } | |
480 | ||
481 | bo_va_entry->va = va; | |
d0ba51b1 | 482 | bo_va_entry->pte_flags = get_pte_flags(adev, mem); |
a46a2cd1 FK |
483 | bo_va_entry->kgd_dev = (void *)adev; |
484 | list_add(&bo_va_entry->bo_list, list_bo_va); | |
485 | ||
486 | if (p_bo_va_entry) | |
487 | *p_bo_va_entry = bo_va_entry; | |
488 | ||
0ce15d6f | 489 | /* Allocate validate page tables if needed */ |
5b21d3e5 | 490 | ret = vm_validate_pt_pd_bos(vm); |
a46a2cd1 FK |
491 | if (ret) { |
492 | pr_err("validate_pt_pd_bos() failed\n"); | |
493 | goto err_alloc_pts; | |
494 | } | |
495 | ||
a46a2cd1 FK |
496 | return 0; |
497 | ||
498 | err_alloc_pts: | |
a46a2cd1 FK |
499 | amdgpu_vm_bo_rmv(adev, bo_va_entry->bo_va); |
500 | list_del(&bo_va_entry->bo_list); | |
501 | err_vmadd: | |
502 | kfree(bo_va_entry); | |
503 | return ret; | |
504 | } | |
505 | ||
506 | static void remove_bo_from_vm(struct amdgpu_device *adev, | |
507 | struct kfd_bo_va_list *entry, unsigned long size) | |
508 | { | |
509 | pr_debug("\t remove VA 0x%llx - 0x%llx in entry %p\n", | |
510 | entry->va, | |
511 | entry->va + size, entry); | |
512 | amdgpu_vm_bo_rmv(adev, entry->bo_va); | |
513 | list_del(&entry->bo_list); | |
514 | kfree(entry); | |
515 | } | |
516 | ||
517 | static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, | |
5ae0283e FK |
518 | struct amdkfd_process_info *process_info, |
519 | bool userptr) | |
a46a2cd1 FK |
520 | { |
521 | struct ttm_validate_buffer *entry = &mem->validate_list; | |
522 | struct amdgpu_bo *bo = mem->bo; | |
523 | ||
524 | INIT_LIST_HEAD(&entry->head); | |
a9f34c70 | 525 | entry->num_shared = 1; |
a46a2cd1 FK |
526 | entry->bo = &bo->tbo; |
527 | mutex_lock(&process_info->lock); | |
5ae0283e FK |
528 | if (userptr) |
529 | list_add_tail(&entry->head, &process_info->userptr_valid_list); | |
530 | else | |
531 | list_add_tail(&entry->head, &process_info->kfd_bo_list); | |
a46a2cd1 FK |
532 | mutex_unlock(&process_info->lock); |
533 | } | |
534 | ||
71efab6a OZ |
535 | static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem, |
536 | struct amdkfd_process_info *process_info) | |
537 | { | |
538 | struct ttm_validate_buffer *bo_list_entry; | |
539 | ||
540 | bo_list_entry = &mem->validate_list; | |
541 | mutex_lock(&process_info->lock); | |
542 | list_del(&bo_list_entry->head); | |
543 | mutex_unlock(&process_info->lock); | |
544 | } | |
545 | ||
5ae0283e FK |
546 | /* Initializes user pages. It registers the MMU notifier and validates |
547 | * the userptr BO in the GTT domain. | |
548 | * | |
549 | * The BO must already be on the userptr_valid_list. Otherwise an | |
550 | * eviction and restore may happen that leaves the new BO unmapped | |
551 | * with the user mode queues running. | |
552 | * | |
553 | * Takes the process_info->lock to protect against concurrent restore | |
554 | * workers. | |
555 | * | |
556 | * Returns 0 for success, negative errno for errors. | |
557 | */ | |
62914a99 | 558 | static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr) |
5ae0283e FK |
559 | { |
560 | struct amdkfd_process_info *process_info = mem->process_info; | |
561 | struct amdgpu_bo *bo = mem->bo; | |
562 | struct ttm_operation_ctx ctx = { true, false }; | |
563 | int ret = 0; | |
564 | ||
565 | mutex_lock(&process_info->lock); | |
566 | ||
567 | ret = amdgpu_ttm_tt_set_userptr(bo->tbo.ttm, user_addr, 0); | |
568 | if (ret) { | |
569 | pr_err("%s: Failed to set userptr: %d\n", __func__, ret); | |
570 | goto out; | |
571 | } | |
572 | ||
573 | ret = amdgpu_mn_register(bo, user_addr); | |
574 | if (ret) { | |
575 | pr_err("%s: Failed to register MMU notifier: %d\n", | |
576 | __func__, ret); | |
577 | goto out; | |
578 | } | |
579 | ||
e5eaa7cc | 580 | ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages); |
5ae0283e FK |
581 | if (ret) { |
582 | pr_err("%s: Failed to get user pages: %d\n", __func__, ret); | |
899fbde1 | 583 | goto unregister_out; |
5ae0283e FK |
584 | } |
585 | ||
5ae0283e FK |
586 | ret = amdgpu_bo_reserve(bo, true); |
587 | if (ret) { | |
588 | pr_err("%s: Failed to reserve BO\n", __func__); | |
589 | goto release_out; | |
590 | } | |
c704ab18 | 591 | amdgpu_bo_placement_from_domain(bo, mem->domain); |
5ae0283e FK |
592 | ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); |
593 | if (ret) | |
594 | pr_err("%s: failed to validate BO\n", __func__); | |
595 | amdgpu_bo_unreserve(bo); | |
596 | ||
597 | release_out: | |
899fbde1 | 598 | amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); |
5ae0283e FK |
599 | unregister_out: |
600 | if (ret) | |
601 | amdgpu_mn_unregister(bo); | |
602 | out: | |
603 | mutex_unlock(&process_info->lock); | |
604 | return ret; | |
605 | } | |
606 | ||
a46a2cd1 FK |
607 | /* Reserving a BO and its page table BOs must happen atomically to |
608 | * avoid deadlocks. Some operations update multiple VMs at once. Track | |
609 | * all the reservation info in a context structure. Optionally a sync | |
610 | * object can track VM updates. | |
611 | */ | |
612 | struct bo_vm_reservation_context { | |
613 | struct amdgpu_bo_list_entry kfd_bo; /* BO list entry for the KFD BO */ | |
614 | unsigned int n_vms; /* Number of VMs reserved */ | |
615 | struct amdgpu_bo_list_entry *vm_pd; /* Array of VM BO list entries */ | |
616 | struct ww_acquire_ctx ticket; /* Reservation ticket */ | |
617 | struct list_head list, duplicates; /* BO lists */ | |
618 | struct amdgpu_sync *sync; /* Pointer to sync object */ | |
619 | bool reserved; /* Whether BOs are reserved */ | |
620 | }; | |
621 | ||
622 | enum bo_vm_match { | |
623 | BO_VM_NOT_MAPPED = 0, /* Match VMs where a BO is not mapped */ | |
624 | BO_VM_MAPPED, /* Match VMs where a BO is mapped */ | |
625 | BO_VM_ALL, /* Match all VMs a BO was added to */ | |
626 | }; | |
627 | ||
628 | /** | |
629 | * reserve_bo_and_vm - reserve a BO and a VM unconditionally. | |
630 | * @mem: KFD BO structure. | |
631 | * @vm: the VM to reserve. | |
632 | * @ctx: the struct that will be used in unreserve_bo_and_vms(). | |
633 | */ | |
634 | static int reserve_bo_and_vm(struct kgd_mem *mem, | |
635 | struct amdgpu_vm *vm, | |
636 | struct bo_vm_reservation_context *ctx) | |
637 | { | |
638 | struct amdgpu_bo *bo = mem->bo; | |
639 | int ret; | |
640 | ||
641 | WARN_ON(!vm); | |
642 | ||
643 | ctx->reserved = false; | |
644 | ctx->n_vms = 1; | |
645 | ctx->sync = &mem->sync; | |
646 | ||
647 | INIT_LIST_HEAD(&ctx->list); | |
648 | INIT_LIST_HEAD(&ctx->duplicates); | |
649 | ||
650 | ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), GFP_KERNEL); | |
651 | if (!ctx->vm_pd) | |
652 | return -ENOMEM; | |
653 | ||
a46a2cd1 FK |
654 | ctx->kfd_bo.priority = 0; |
655 | ctx->kfd_bo.tv.bo = &bo->tbo; | |
a9f34c70 | 656 | ctx->kfd_bo.tv.num_shared = 1; |
a46a2cd1 FK |
657 | list_add(&ctx->kfd_bo.tv.head, &ctx->list); |
658 | ||
659 | amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]); | |
660 | ||
661 | ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list, | |
9165fb87 | 662 | false, &ctx->duplicates); |
a46a2cd1 FK |
663 | if (!ret) |
664 | ctx->reserved = true; | |
665 | else { | |
666 | pr_err("Failed to reserve buffers in ttm\n"); | |
667 | kfree(ctx->vm_pd); | |
668 | ctx->vm_pd = NULL; | |
669 | } | |
670 | ||
671 | return ret; | |
672 | } | |
673 | ||
674 | /** | |
675 | * reserve_bo_and_cond_vms - reserve a BO and some VMs conditionally | |
676 | * @mem: KFD BO structure. | |
677 | * @vm: the VM to reserve. If NULL, then all VMs associated with the BO | |
678 | * is used. Otherwise, a single VM associated with the BO. | |
679 | * @map_type: the mapping status that will be used to filter the VMs. | |
680 | * @ctx: the struct that will be used in unreserve_bo_and_vms(). | |
681 | * | |
682 | * Returns 0 for success, negative for failure. | |
683 | */ | |
684 | static int reserve_bo_and_cond_vms(struct kgd_mem *mem, | |
685 | struct amdgpu_vm *vm, enum bo_vm_match map_type, | |
686 | struct bo_vm_reservation_context *ctx) | |
687 | { | |
688 | struct amdgpu_bo *bo = mem->bo; | |
689 | struct kfd_bo_va_list *entry; | |
690 | unsigned int i; | |
691 | int ret; | |
692 | ||
693 | ctx->reserved = false; | |
694 | ctx->n_vms = 0; | |
695 | ctx->vm_pd = NULL; | |
696 | ctx->sync = &mem->sync; | |
697 | ||
698 | INIT_LIST_HEAD(&ctx->list); | |
699 | INIT_LIST_HEAD(&ctx->duplicates); | |
700 | ||
701 | list_for_each_entry(entry, &mem->bo_va_list, bo_list) { | |
702 | if ((vm && vm != entry->bo_va->base.vm) || | |
703 | (entry->is_mapped != map_type | |
704 | && map_type != BO_VM_ALL)) | |
705 | continue; | |
706 | ||
707 | ctx->n_vms++; | |
708 | } | |
709 | ||
710 | if (ctx->n_vms != 0) { | |
711 | ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), | |
712 | GFP_KERNEL); | |
713 | if (!ctx->vm_pd) | |
714 | return -ENOMEM; | |
715 | } | |
716 | ||
a46a2cd1 FK |
717 | ctx->kfd_bo.priority = 0; |
718 | ctx->kfd_bo.tv.bo = &bo->tbo; | |
a9f34c70 | 719 | ctx->kfd_bo.tv.num_shared = 1; |
a46a2cd1 FK |
720 | list_add(&ctx->kfd_bo.tv.head, &ctx->list); |
721 | ||
722 | i = 0; | |
723 | list_for_each_entry(entry, &mem->bo_va_list, bo_list) { | |
724 | if ((vm && vm != entry->bo_va->base.vm) || | |
725 | (entry->is_mapped != map_type | |
726 | && map_type != BO_VM_ALL)) | |
727 | continue; | |
728 | ||
729 | amdgpu_vm_get_pd_bo(entry->bo_va->base.vm, &ctx->list, | |
730 | &ctx->vm_pd[i]); | |
731 | i++; | |
732 | } | |
733 | ||
734 | ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list, | |
9165fb87 | 735 | false, &ctx->duplicates); |
a46a2cd1 FK |
736 | if (!ret) |
737 | ctx->reserved = true; | |
738 | else | |
739 | pr_err("Failed to reserve buffers in ttm.\n"); | |
740 | ||
741 | if (ret) { | |
742 | kfree(ctx->vm_pd); | |
743 | ctx->vm_pd = NULL; | |
744 | } | |
745 | ||
746 | return ret; | |
747 | } | |
748 | ||
749 | /** | |
750 | * unreserve_bo_and_vms - Unreserve BO and VMs from a reservation context | |
751 | * @ctx: Reservation context to unreserve | |
752 | * @wait: Optionally wait for a sync object representing pending VM updates | |
753 | * @intr: Whether the wait is interruptible | |
754 | * | |
755 | * Also frees any resources allocated in | |
756 | * reserve_bo_and_(cond_)vm(s). Returns the status from | |
757 | * amdgpu_sync_wait. | |
758 | */ | |
759 | static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx, | |
760 | bool wait, bool intr) | |
761 | { | |
762 | int ret = 0; | |
763 | ||
764 | if (wait) | |
765 | ret = amdgpu_sync_wait(ctx->sync, intr); | |
766 | ||
767 | if (ctx->reserved) | |
768 | ttm_eu_backoff_reservation(&ctx->ticket, &ctx->list); | |
769 | kfree(ctx->vm_pd); | |
770 | ||
771 | ctx->sync = NULL; | |
772 | ||
773 | ctx->reserved = false; | |
774 | ctx->vm_pd = NULL; | |
775 | ||
776 | return ret; | |
777 | } | |
778 | ||
779 | static int unmap_bo_from_gpuvm(struct amdgpu_device *adev, | |
780 | struct kfd_bo_va_list *entry, | |
781 | struct amdgpu_sync *sync) | |
782 | { | |
783 | struct amdgpu_bo_va *bo_va = entry->bo_va; | |
784 | struct amdgpu_vm *vm = bo_va->base.vm; | |
a46a2cd1 | 785 | |
a46a2cd1 FK |
786 | amdgpu_vm_bo_unmap(adev, bo_va, entry->va); |
787 | ||
788 | amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); | |
789 | ||
e095fc17 | 790 | amdgpu_sync_fence(sync, bo_va->last_pt_update, false); |
a46a2cd1 FK |
791 | |
792 | return 0; | |
793 | } | |
794 | ||
795 | static int update_gpuvm_pte(struct amdgpu_device *adev, | |
796 | struct kfd_bo_va_list *entry, | |
797 | struct amdgpu_sync *sync) | |
798 | { | |
799 | int ret; | |
1e608013 | 800 | struct amdgpu_bo_va *bo_va = entry->bo_va; |
a46a2cd1 FK |
801 | |
802 | /* Update the page tables */ | |
803 | ret = amdgpu_vm_bo_update(adev, bo_va, false); | |
804 | if (ret) { | |
805 | pr_err("amdgpu_vm_bo_update failed\n"); | |
806 | return ret; | |
807 | } | |
808 | ||
e095fc17 | 809 | return amdgpu_sync_fence(sync, bo_va->last_pt_update, false); |
a46a2cd1 FK |
810 | } |
811 | ||
812 | static int map_bo_to_gpuvm(struct amdgpu_device *adev, | |
5ae0283e FK |
813 | struct kfd_bo_va_list *entry, struct amdgpu_sync *sync, |
814 | bool no_update_pte) | |
a46a2cd1 FK |
815 | { |
816 | int ret; | |
817 | ||
818 | /* Set virtual address for the allocation */ | |
819 | ret = amdgpu_vm_bo_map(adev, entry->bo_va, entry->va, 0, | |
820 | amdgpu_bo_size(entry->bo_va->base.bo), | |
821 | entry->pte_flags); | |
822 | if (ret) { | |
823 | pr_err("Failed to map VA 0x%llx in vm. ret %d\n", | |
824 | entry->va, ret); | |
825 | return ret; | |
826 | } | |
827 | ||
5ae0283e FK |
828 | if (no_update_pte) |
829 | return 0; | |
830 | ||
a46a2cd1 FK |
831 | ret = update_gpuvm_pte(adev, entry, sync); |
832 | if (ret) { | |
833 | pr_err("update_gpuvm_pte() failed\n"); | |
834 | goto update_gpuvm_pte_failed; | |
835 | } | |
836 | ||
837 | return 0; | |
838 | ||
839 | update_gpuvm_pte_failed: | |
840 | unmap_bo_from_gpuvm(adev, entry, sync); | |
841 | return ret; | |
842 | } | |
843 | ||
b408a548 FK |
844 | static struct sg_table *create_doorbell_sg(uint64_t addr, uint32_t size) |
845 | { | |
846 | struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL); | |
847 | ||
848 | if (!sg) | |
849 | return NULL; | |
850 | if (sg_alloc_table(sg, 1, GFP_KERNEL)) { | |
851 | kfree(sg); | |
852 | return NULL; | |
853 | } | |
854 | sg->sgl->dma_address = addr; | |
855 | sg->sgl->length = size; | |
856 | #ifdef CONFIG_NEED_SG_DMA_LENGTH | |
857 | sg->sgl->dma_length = size; | |
858 | #endif | |
859 | return sg; | |
860 | } | |
861 | ||
a46a2cd1 FK |
862 | static int process_validate_vms(struct amdkfd_process_info *process_info) |
863 | { | |
5b21d3e5 | 864 | struct amdgpu_vm *peer_vm; |
a46a2cd1 FK |
865 | int ret; |
866 | ||
867 | list_for_each_entry(peer_vm, &process_info->vm_list_head, | |
868 | vm_list_node) { | |
869 | ret = vm_validate_pt_pd_bos(peer_vm); | |
870 | if (ret) | |
871 | return ret; | |
872 | } | |
873 | ||
874 | return 0; | |
875 | } | |
876 | ||
9130cc01 HK |
877 | static int process_sync_pds_resv(struct amdkfd_process_info *process_info, |
878 | struct amdgpu_sync *sync) | |
879 | { | |
880 | struct amdgpu_vm *peer_vm; | |
881 | int ret; | |
882 | ||
883 | list_for_each_entry(peer_vm, &process_info->vm_list_head, | |
884 | vm_list_node) { | |
885 | struct amdgpu_bo *pd = peer_vm->root.base.bo; | |
886 | ||
5d319660 CK |
887 | ret = amdgpu_sync_resv(NULL, sync, pd->tbo.base.resv, |
888 | AMDGPU_SYNC_NE_OWNER, | |
889 | AMDGPU_FENCE_OWNER_KFD); | |
9130cc01 HK |
890 | if (ret) |
891 | return ret; | |
892 | } | |
893 | ||
894 | return 0; | |
895 | } | |
896 | ||
a46a2cd1 FK |
897 | static int process_update_pds(struct amdkfd_process_info *process_info, |
898 | struct amdgpu_sync *sync) | |
899 | { | |
5b21d3e5 | 900 | struct amdgpu_vm *peer_vm; |
a46a2cd1 FK |
901 | int ret; |
902 | ||
903 | list_for_each_entry(peer_vm, &process_info->vm_list_head, | |
904 | vm_list_node) { | |
5b21d3e5 | 905 | ret = vm_update_pds(peer_vm, sync); |
a46a2cd1 FK |
906 | if (ret) |
907 | return ret; | |
908 | } | |
909 | ||
910 | return 0; | |
911 | } | |
912 | ||
ede0dd86 FK |
913 | static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, |
914 | struct dma_fence **ef) | |
a46a2cd1 | 915 | { |
3486625b | 916 | struct amdkfd_process_info *info = NULL; |
ede0dd86 | 917 | int ret; |
a46a2cd1 FK |
918 | |
919 | if (!*process_info) { | |
920 | info = kzalloc(sizeof(*info), GFP_KERNEL); | |
ede0dd86 FK |
921 | if (!info) |
922 | return -ENOMEM; | |
a46a2cd1 FK |
923 | |
924 | mutex_init(&info->lock); | |
925 | INIT_LIST_HEAD(&info->vm_list_head); | |
926 | INIT_LIST_HEAD(&info->kfd_bo_list); | |
5ae0283e FK |
927 | INIT_LIST_HEAD(&info->userptr_valid_list); |
928 | INIT_LIST_HEAD(&info->userptr_inval_list); | |
a46a2cd1 FK |
929 | |
930 | info->eviction_fence = | |
931 | amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1), | |
932 | current->mm); | |
933 | if (!info->eviction_fence) { | |
934 | pr_err("Failed to create eviction fence\n"); | |
ede0dd86 | 935 | ret = -ENOMEM; |
a46a2cd1 FK |
936 | goto create_evict_fence_fail; |
937 | } | |
938 | ||
5ae0283e FK |
939 | info->pid = get_task_pid(current->group_leader, PIDTYPE_PID); |
940 | atomic_set(&info->evicted_bos, 0); | |
941 | INIT_DELAYED_WORK(&info->restore_userptr_work, | |
942 | amdgpu_amdkfd_restore_userptr_worker); | |
943 | ||
a46a2cd1 FK |
944 | *process_info = info; |
945 | *ef = dma_fence_get(&info->eviction_fence->base); | |
946 | } | |
947 | ||
ede0dd86 | 948 | vm->process_info = *process_info; |
a46a2cd1 | 949 | |
3486625b | 950 | /* Validate page directory and attach eviction fence */ |
ede0dd86 | 951 | ret = amdgpu_bo_reserve(vm->root.base.bo, true); |
3486625b FK |
952 | if (ret) |
953 | goto reserve_pd_fail; | |
ede0dd86 | 954 | ret = vm_validate_pt_pd_bos(vm); |
3486625b FK |
955 | if (ret) { |
956 | pr_err("validate_pt_pd_bos() failed\n"); | |
957 | goto validate_pd_fail; | |
958 | } | |
d38ca8f0 CIK |
959 | ret = amdgpu_bo_sync_wait(vm->root.base.bo, |
960 | AMDGPU_FENCE_OWNER_KFD, false); | |
3486625b FK |
961 | if (ret) |
962 | goto wait_pd_fail; | |
52791eee | 963 | ret = dma_resv_reserve_shared(vm->root.base.bo->tbo.base.resv, 1); |
dd68722c FK |
964 | if (ret) |
965 | goto reserve_shared_fail; | |
ede0dd86 FK |
966 | amdgpu_bo_fence(vm->root.base.bo, |
967 | &vm->process_info->eviction_fence->base, true); | |
968 | amdgpu_bo_unreserve(vm->root.base.bo); | |
3486625b FK |
969 | |
970 | /* Update process info */ | |
ede0dd86 FK |
971 | mutex_lock(&vm->process_info->lock); |
972 | list_add_tail(&vm->vm_list_node, | |
973 | &(vm->process_info->vm_list_head)); | |
974 | vm->process_info->n_vms++; | |
975 | mutex_unlock(&vm->process_info->lock); | |
a46a2cd1 | 976 | |
ede0dd86 | 977 | return 0; |
a46a2cd1 | 978 | |
dd68722c | 979 | reserve_shared_fail: |
3486625b FK |
980 | wait_pd_fail: |
981 | validate_pd_fail: | |
ede0dd86 | 982 | amdgpu_bo_unreserve(vm->root.base.bo); |
3486625b | 983 | reserve_pd_fail: |
ede0dd86 FK |
984 | vm->process_info = NULL; |
985 | if (info) { | |
986 | /* Two fence references: one in info and one in *ef */ | |
987 | dma_fence_put(&info->eviction_fence->base); | |
988 | dma_fence_put(*ef); | |
989 | *ef = NULL; | |
990 | *process_info = NULL; | |
5ae0283e | 991 | put_pid(info->pid); |
a46a2cd1 | 992 | create_evict_fence_fail: |
ede0dd86 FK |
993 | mutex_destroy(&info->lock); |
994 | kfree(info); | |
995 | } | |
996 | return ret; | |
997 | } | |
998 | ||
1685b01a OZ |
999 | int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, unsigned int pasid, |
1000 | void **vm, void **process_info, | |
ede0dd86 FK |
1001 | struct dma_fence **ef) |
1002 | { | |
1003 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
1004 | struct amdgpu_vm *new_vm; | |
1005 | int ret; | |
1006 | ||
1007 | new_vm = kzalloc(sizeof(*new_vm), GFP_KERNEL); | |
1008 | if (!new_vm) | |
1009 | return -ENOMEM; | |
1010 | ||
1011 | /* Initialize AMDGPU part of the VM */ | |
1685b01a | 1012 | ret = amdgpu_vm_init(adev, new_vm, AMDGPU_VM_CONTEXT_COMPUTE, pasid); |
ede0dd86 FK |
1013 | if (ret) { |
1014 | pr_err("Failed init vm ret %d\n", ret); | |
1015 | goto amdgpu_vm_init_fail; | |
1016 | } | |
1017 | ||
1018 | /* Initialize KFD part of the VM and process info */ | |
1019 | ret = init_kfd_vm(new_vm, process_info, ef); | |
1020 | if (ret) | |
1021 | goto init_kfd_vm_fail; | |
1022 | ||
1023 | *vm = (void *) new_vm; | |
1024 | ||
1025 | return 0; | |
1026 | ||
1027 | init_kfd_vm_fail: | |
5b21d3e5 | 1028 | amdgpu_vm_fini(adev, new_vm); |
ede0dd86 | 1029 | amdgpu_vm_init_fail: |
a46a2cd1 FK |
1030 | kfree(new_vm); |
1031 | return ret; | |
a46a2cd1 FK |
1032 | } |
1033 | ||
ede0dd86 | 1034 | int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, |
1685b01a | 1035 | struct file *filp, unsigned int pasid, |
ede0dd86 FK |
1036 | void **vm, void **process_info, |
1037 | struct dma_fence **ef) | |
a46a2cd1 FK |
1038 | { |
1039 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
ede0dd86 FK |
1040 | struct drm_file *drm_priv = filp->private_data; |
1041 | struct amdgpu_fpriv *drv_priv = drm_priv->driver_priv; | |
1042 | struct amdgpu_vm *avm = &drv_priv->vm; | |
1043 | int ret; | |
a46a2cd1 | 1044 | |
ede0dd86 FK |
1045 | /* Already a compute VM? */ |
1046 | if (avm->process_info) | |
1047 | return -EINVAL; | |
1048 | ||
1049 | /* Convert VM into a compute VM */ | |
1685b01a | 1050 | ret = amdgpu_vm_make_compute(adev, avm, pasid); |
ede0dd86 FK |
1051 | if (ret) |
1052 | return ret; | |
1053 | ||
1054 | /* Initialize KFD part of the VM and process info */ | |
1055 | ret = init_kfd_vm(avm, process_info, ef); | |
1056 | if (ret) | |
1057 | return ret; | |
1058 | ||
1059 | *vm = (void *)avm; | |
1060 | ||
1061 | return 0; | |
1062 | } | |
1063 | ||
1064 | void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, | |
1065 | struct amdgpu_vm *vm) | |
1066 | { | |
1067 | struct amdkfd_process_info *process_info = vm->process_info; | |
1068 | struct amdgpu_bo *pd = vm->root.base.bo; | |
1069 | ||
1070 | if (!process_info) | |
a46a2cd1 FK |
1071 | return; |
1072 | ||
a46a2cd1 | 1073 | /* Release eviction fence from PD */ |
a46a2cd1 FK |
1074 | amdgpu_bo_reserve(pd, false); |
1075 | amdgpu_bo_fence(pd, NULL, false); | |
1076 | amdgpu_bo_unreserve(pd); | |
1077 | ||
ede0dd86 | 1078 | /* Update process info */ |
a46a2cd1 FK |
1079 | mutex_lock(&process_info->lock); |
1080 | process_info->n_vms--; | |
ede0dd86 | 1081 | list_del(&vm->vm_list_node); |
a46a2cd1 FK |
1082 | mutex_unlock(&process_info->lock); |
1083 | ||
f4a3c42b | 1084 | vm->process_info = NULL; |
1085 | ||
ede0dd86 | 1086 | /* Release per-process resources when last compute VM is destroyed */ |
a46a2cd1 FK |
1087 | if (!process_info->n_vms) { |
1088 | WARN_ON(!list_empty(&process_info->kfd_bo_list)); | |
5ae0283e FK |
1089 | WARN_ON(!list_empty(&process_info->userptr_valid_list)); |
1090 | WARN_ON(!list_empty(&process_info->userptr_inval_list)); | |
a46a2cd1 FK |
1091 | |
1092 | dma_fence_put(&process_info->eviction_fence->base); | |
5ae0283e FK |
1093 | cancel_delayed_work_sync(&process_info->restore_userptr_work); |
1094 | put_pid(process_info->pid); | |
a46a2cd1 FK |
1095 | mutex_destroy(&process_info->lock); |
1096 | kfree(process_info); | |
1097 | } | |
ede0dd86 FK |
1098 | } |
1099 | ||
1100 | void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm) | |
1101 | { | |
1102 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
1103 | struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; | |
1104 | ||
1105 | if (WARN_ON(!kgd || !vm)) | |
1106 | return; | |
1107 | ||
1108 | pr_debug("Destroying process vm %p\n", vm); | |
a46a2cd1 FK |
1109 | |
1110 | /* Release the VM context */ | |
1111 | amdgpu_vm_fini(adev, avm); | |
1112 | kfree(vm); | |
1113 | } | |
1114 | ||
bf47afba OZ |
1115 | void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm) |
1116 | { | |
1117 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
1118 | struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; | |
1119 | ||
1120 | if (WARN_ON(!kgd || !vm)) | |
1121 | return; | |
1122 | ||
1123 | pr_debug("Releasing process vm %p\n", vm); | |
1124 | ||
1125 | /* The original pasid of amdgpu vm has already been | |
1126 | * released during making a amdgpu vm to a compute vm | |
1127 | * The current pasid is managed by kfd and will be | |
1128 | * released on kfd process destroy. Set amdgpu pasid | |
1129 | * to 0 to avoid duplicate release. | |
1130 | */ | |
1131 | amdgpu_vm_release_compute(adev, avm); | |
1132 | } | |
1133 | ||
e715c6d0 | 1134 | uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm) |
a46a2cd1 | 1135 | { |
5b21d3e5 | 1136 | struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; |
e715c6d0 SL |
1137 | struct amdgpu_bo *pd = avm->root.base.bo; |
1138 | struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); | |
a46a2cd1 | 1139 | |
e715c6d0 SL |
1140 | if (adev->asic_type < CHIP_VEGA10) |
1141 | return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT; | |
1142 | return avm->pd_phys_addr; | |
a46a2cd1 FK |
1143 | } |
1144 | ||
1145 | int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( | |
1146 | struct kgd_dev *kgd, uint64_t va, uint64_t size, | |
1147 | void *vm, struct kgd_mem **mem, | |
1148 | uint64_t *offset, uint32_t flags) | |
1149 | { | |
1150 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
5b21d3e5 | 1151 | struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; |
b408a548 FK |
1152 | enum ttm_bo_type bo_type = ttm_bo_type_device; |
1153 | struct sg_table *sg = NULL; | |
5ae0283e | 1154 | uint64_t user_addr = 0; |
a46a2cd1 | 1155 | struct amdgpu_bo *bo; |
3216c6b7 | 1156 | struct amdgpu_bo_param bp; |
5ae0283e | 1157 | u32 domain, alloc_domain; |
a46a2cd1 | 1158 | u64 alloc_flags; |
a46a2cd1 FK |
1159 | int ret; |
1160 | ||
1161 | /* | |
1162 | * Check on which domain to allocate BO | |
1163 | */ | |
1d251d90 | 1164 | if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { |
5ae0283e | 1165 | domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; |
6856e4b6 | 1166 | alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE; |
1d251d90 | 1167 | alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ? |
a46a2cd1 FK |
1168 | AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : |
1169 | AMDGPU_GEM_CREATE_NO_CPU_ACCESS; | |
1d251d90 | 1170 | } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) { |
5ae0283e FK |
1171 | domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT; |
1172 | alloc_flags = 0; | |
1d251d90 | 1173 | } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { |
5ae0283e FK |
1174 | domain = AMDGPU_GEM_DOMAIN_GTT; |
1175 | alloc_domain = AMDGPU_GEM_DOMAIN_CPU; | |
a46a2cd1 | 1176 | alloc_flags = 0; |
5ae0283e FK |
1177 | if (!offset || !*offset) |
1178 | return -EINVAL; | |
35f3fc87 | 1179 | user_addr = untagged_addr(*offset); |
1d251d90 YZ |
1180 | } else if (flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | |
1181 | KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) { | |
b408a548 FK |
1182 | domain = AMDGPU_GEM_DOMAIN_GTT; |
1183 | alloc_domain = AMDGPU_GEM_DOMAIN_CPU; | |
1184 | bo_type = ttm_bo_type_sg; | |
1185 | alloc_flags = 0; | |
1186 | if (size > UINT_MAX) | |
1187 | return -EINVAL; | |
1188 | sg = create_doorbell_sg(*offset, size); | |
1189 | if (!sg) | |
1190 | return -ENOMEM; | |
a46a2cd1 FK |
1191 | } else { |
1192 | return -EINVAL; | |
1193 | } | |
1194 | ||
1195 | *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); | |
b408a548 FK |
1196 | if (!*mem) { |
1197 | ret = -ENOMEM; | |
1198 | goto err; | |
1199 | } | |
a46a2cd1 FK |
1200 | INIT_LIST_HEAD(&(*mem)->bo_va_list); |
1201 | mutex_init(&(*mem)->lock); | |
1d251d90 | 1202 | (*mem)->aql_queue = !!(flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM); |
a46a2cd1 FK |
1203 | |
1204 | /* Workaround for AQL queue wraparound bug. Map the same | |
1205 | * memory twice. That means we only actually allocate half | |
1206 | * the memory. | |
1207 | */ | |
1208 | if ((*mem)->aql_queue) | |
1209 | size = size >> 1; | |
1210 | ||
d0ba51b1 | 1211 | (*mem)->alloc_flags = flags; |
a46a2cd1 FK |
1212 | |
1213 | amdgpu_sync_create(&(*mem)->sync); | |
1214 | ||
b408a548 | 1215 | ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, !!sg); |
a46a2cd1 FK |
1216 | if (ret) { |
1217 | pr_debug("Insufficient system memory\n"); | |
5d240da9 | 1218 | goto err_reserve_limit; |
a46a2cd1 FK |
1219 | } |
1220 | ||
1221 | pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n", | |
1222 | va, size, domain_string(alloc_domain)); | |
1223 | ||
3216c6b7 CZ |
1224 | memset(&bp, 0, sizeof(bp)); |
1225 | bp.size = size; | |
17da41bf | 1226 | bp.byte_align = 1; |
3216c6b7 CZ |
1227 | bp.domain = alloc_domain; |
1228 | bp.flags = alloc_flags; | |
b408a548 | 1229 | bp.type = bo_type; |
3216c6b7 CZ |
1230 | bp.resv = NULL; |
1231 | ret = amdgpu_bo_create(adev, &bp, &bo); | |
a46a2cd1 FK |
1232 | if (ret) { |
1233 | pr_debug("Failed to create BO on domain %s. ret %d\n", | |
1234 | domain_string(alloc_domain), ret); | |
1235 | goto err_bo_create; | |
1236 | } | |
b408a548 FK |
1237 | if (bo_type == ttm_bo_type_sg) { |
1238 | bo->tbo.sg = sg; | |
1239 | bo->tbo.ttm->sg = sg; | |
1240 | } | |
a46a2cd1 FK |
1241 | bo->kfd_bo = *mem; |
1242 | (*mem)->bo = bo; | |
5ae0283e FK |
1243 | if (user_addr) |
1244 | bo->flags |= AMDGPU_AMDKFD_USERPTR_BO; | |
a46a2cd1 FK |
1245 | |
1246 | (*mem)->va = va; | |
5ae0283e | 1247 | (*mem)->domain = domain; |
a46a2cd1 | 1248 | (*mem)->mapped_to_gpu_memory = 0; |
5b21d3e5 | 1249 | (*mem)->process_info = avm->process_info; |
5ae0283e FK |
1250 | add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr); |
1251 | ||
1252 | if (user_addr) { | |
62914a99 | 1253 | ret = init_user_pages(*mem, user_addr); |
71efab6a | 1254 | if (ret) |
5ae0283e | 1255 | goto allocate_init_user_pages_failed; |
5ae0283e | 1256 | } |
a46a2cd1 FK |
1257 | |
1258 | if (offset) | |
1259 | *offset = amdgpu_bo_mmap_offset(bo); | |
1260 | ||
1261 | return 0; | |
1262 | ||
5ae0283e | 1263 | allocate_init_user_pages_failed: |
71efab6a | 1264 | remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info); |
5ae0283e FK |
1265 | amdgpu_bo_unref(&bo); |
1266 | /* Don't unreserve system mem limit twice */ | |
5d240da9 | 1267 | goto err_reserve_limit; |
a46a2cd1 | 1268 | err_bo_create: |
b408a548 | 1269 | unreserve_mem_limit(adev, size, alloc_domain, !!sg); |
5d240da9 | 1270 | err_reserve_limit: |
a46a2cd1 FK |
1271 | mutex_destroy(&(*mem)->lock); |
1272 | kfree(*mem); | |
b408a548 FK |
1273 | err: |
1274 | if (sg) { | |
1275 | sg_free_table(sg); | |
1276 | kfree(sg); | |
1277 | } | |
a46a2cd1 FK |
1278 | return ret; |
1279 | } | |
1280 | ||
1281 | int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( | |
1282 | struct kgd_dev *kgd, struct kgd_mem *mem) | |
1283 | { | |
1284 | struct amdkfd_process_info *process_info = mem->process_info; | |
1285 | unsigned long bo_size = mem->bo->tbo.mem.size; | |
1286 | struct kfd_bo_va_list *entry, *tmp; | |
1287 | struct bo_vm_reservation_context ctx; | |
1288 | struct ttm_validate_buffer *bo_list_entry; | |
1289 | int ret; | |
1290 | ||
1291 | mutex_lock(&mem->lock); | |
1292 | ||
1293 | if (mem->mapped_to_gpu_memory > 0) { | |
1294 | pr_debug("BO VA 0x%llx size 0x%lx is still mapped.\n", | |
1295 | mem->va, bo_size); | |
1296 | mutex_unlock(&mem->lock); | |
1297 | return -EBUSY; | |
1298 | } | |
1299 | ||
1300 | mutex_unlock(&mem->lock); | |
1301 | /* lock is not needed after this, since mem is unused and will | |
1302 | * be freed anyway | |
1303 | */ | |
1304 | ||
5ae0283e FK |
1305 | /* No more MMU notifiers */ |
1306 | amdgpu_mn_unregister(mem->bo); | |
1307 | ||
a46a2cd1 FK |
1308 | /* Make sure restore workers don't access the BO any more */ |
1309 | bo_list_entry = &mem->validate_list; | |
1310 | mutex_lock(&process_info->lock); | |
1311 | list_del(&bo_list_entry->head); | |
1312 | mutex_unlock(&process_info->lock); | |
1313 | ||
1314 | ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx); | |
1315 | if (unlikely(ret)) | |
1316 | return ret; | |
1317 | ||
1318 | /* The eviction fence should be removed by the last unmap. | |
1319 | * TODO: Log an error condition if the bo still has the eviction fence | |
1320 | * attached | |
1321 | */ | |
1322 | amdgpu_amdkfd_remove_eviction_fence(mem->bo, | |
2d086fde | 1323 | process_info->eviction_fence); |
a46a2cd1 FK |
1324 | pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va, |
1325 | mem->va + bo_size * (1 + mem->aql_queue)); | |
1326 | ||
1327 | /* Remove from VM internal data structures */ | |
1328 | list_for_each_entry_safe(entry, tmp, &mem->bo_va_list, bo_list) | |
1329 | remove_bo_from_vm((struct amdgpu_device *)entry->kgd_dev, | |
1330 | entry, bo_size); | |
1331 | ||
1332 | ret = unreserve_bo_and_vms(&ctx, false, false); | |
1333 | ||
1334 | /* Free the sync object */ | |
1335 | amdgpu_sync_free(&mem->sync); | |
1336 | ||
d8e408a8 OZ |
1337 | /* If the SG is not NULL, it's one we created for a doorbell or mmio |
1338 | * remap BO. We need to free it. | |
b408a548 FK |
1339 | */ |
1340 | if (mem->bo->tbo.sg) { | |
1341 | sg_free_table(mem->bo->tbo.sg); | |
1342 | kfree(mem->bo->tbo.sg); | |
1343 | } | |
1344 | ||
a46a2cd1 | 1345 | /* Free the BO*/ |
39b3128d | 1346 | drm_gem_object_put_unlocked(&mem->bo->tbo.base); |
a46a2cd1 FK |
1347 | mutex_destroy(&mem->lock); |
1348 | kfree(mem); | |
1349 | ||
1350 | return ret; | |
1351 | } | |
1352 | ||
1353 | int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( | |
1354 | struct kgd_dev *kgd, struct kgd_mem *mem, void *vm) | |
1355 | { | |
1356 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
5b21d3e5 | 1357 | struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; |
a46a2cd1 FK |
1358 | int ret; |
1359 | struct amdgpu_bo *bo; | |
1360 | uint32_t domain; | |
1361 | struct kfd_bo_va_list *entry; | |
1362 | struct bo_vm_reservation_context ctx; | |
1363 | struct kfd_bo_va_list *bo_va_entry = NULL; | |
1364 | struct kfd_bo_va_list *bo_va_entry_aql = NULL; | |
1365 | unsigned long bo_size; | |
5ae0283e | 1366 | bool is_invalid_userptr = false; |
a46a2cd1 FK |
1367 | |
1368 | bo = mem->bo; | |
a46a2cd1 FK |
1369 | if (!bo) { |
1370 | pr_err("Invalid BO when mapping memory to GPU\n"); | |
5ae0283e | 1371 | return -EINVAL; |
a46a2cd1 FK |
1372 | } |
1373 | ||
5ae0283e FK |
1374 | /* Make sure restore is not running concurrently. Since we |
1375 | * don't map invalid userptr BOs, we rely on the next restore | |
1376 | * worker to do the mapping | |
1377 | */ | |
1378 | mutex_lock(&mem->process_info->lock); | |
1379 | ||
1380 | /* Lock mmap-sem. If we find an invalid userptr BO, we can be | |
1381 | * sure that the MMU notifier is no longer running | |
1382 | * concurrently and the queues are actually stopped | |
1383 | */ | |
1384 | if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { | |
1385 | down_write(¤t->mm->mmap_sem); | |
1386 | is_invalid_userptr = atomic_read(&mem->invalid); | |
1387 | up_write(¤t->mm->mmap_sem); | |
1388 | } | |
1389 | ||
1390 | mutex_lock(&mem->lock); | |
1391 | ||
a46a2cd1 FK |
1392 | domain = mem->domain; |
1393 | bo_size = bo->tbo.mem.size; | |
1394 | ||
1395 | pr_debug("Map VA 0x%llx - 0x%llx to vm %p domain %s\n", | |
1396 | mem->va, | |
1397 | mem->va + bo_size * (1 + mem->aql_queue), | |
1398 | vm, domain_string(domain)); | |
1399 | ||
1400 | ret = reserve_bo_and_vm(mem, vm, &ctx); | |
1401 | if (unlikely(ret)) | |
1402 | goto out; | |
1403 | ||
5ae0283e FK |
1404 | /* Userptr can be marked as "not invalid", but not actually be |
1405 | * validated yet (still in the system domain). In that case | |
1406 | * the queues are still stopped and we can leave mapping for | |
1407 | * the next restore worker | |
1408 | */ | |
0f04e538 FK |
1409 | if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && |
1410 | bo->tbo.mem.mem_type == TTM_PL_SYSTEM) | |
5ae0283e FK |
1411 | is_invalid_userptr = true; |
1412 | ||
5b21d3e5 FK |
1413 | if (check_if_add_bo_to_vm(avm, mem)) { |
1414 | ret = add_bo_to_vm(adev, mem, avm, false, | |
a46a2cd1 FK |
1415 | &bo_va_entry); |
1416 | if (ret) | |
1417 | goto add_bo_to_vm_failed; | |
1418 | if (mem->aql_queue) { | |
5b21d3e5 | 1419 | ret = add_bo_to_vm(adev, mem, avm, |
a46a2cd1 FK |
1420 | true, &bo_va_entry_aql); |
1421 | if (ret) | |
1422 | goto add_bo_to_vm_failed_aql; | |
1423 | } | |
1424 | } else { | |
5b21d3e5 | 1425 | ret = vm_validate_pt_pd_bos(avm); |
a46a2cd1 FK |
1426 | if (unlikely(ret)) |
1427 | goto add_bo_to_vm_failed; | |
1428 | } | |
1429 | ||
5ae0283e FK |
1430 | if (mem->mapped_to_gpu_memory == 0 && |
1431 | !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { | |
a46a2cd1 FK |
1432 | /* Validate BO only once. The eviction fence gets added to BO |
1433 | * the first time it is mapped. Validate will wait for all | |
1434 | * background evictions to complete. | |
1435 | */ | |
1436 | ret = amdgpu_amdkfd_bo_validate(bo, domain, true); | |
1437 | if (ret) { | |
1438 | pr_debug("Validate failed\n"); | |
1439 | goto map_bo_to_gpuvm_failed; | |
1440 | } | |
1441 | } | |
1442 | ||
1443 | list_for_each_entry(entry, &mem->bo_va_list, bo_list) { | |
1444 | if (entry->bo_va->base.vm == vm && !entry->is_mapped) { | |
1445 | pr_debug("\t map VA 0x%llx - 0x%llx in entry %p\n", | |
1446 | entry->va, entry->va + bo_size, | |
1447 | entry); | |
1448 | ||
5ae0283e FK |
1449 | ret = map_bo_to_gpuvm(adev, entry, ctx.sync, |
1450 | is_invalid_userptr); | |
a46a2cd1 | 1451 | if (ret) { |
0d87c9cf | 1452 | pr_err("Failed to map bo to gpuvm\n"); |
a46a2cd1 FK |
1453 | goto map_bo_to_gpuvm_failed; |
1454 | } | |
1455 | ||
1456 | ret = vm_update_pds(vm, ctx.sync); | |
1457 | if (ret) { | |
1458 | pr_err("Failed to update page directories\n"); | |
1459 | goto map_bo_to_gpuvm_failed; | |
1460 | } | |
1461 | ||
1462 | entry->is_mapped = true; | |
1463 | mem->mapped_to_gpu_memory++; | |
1464 | pr_debug("\t INC mapping count %d\n", | |
1465 | mem->mapped_to_gpu_memory); | |
1466 | } | |
1467 | } | |
1468 | ||
1469 | if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->pin_count) | |
1470 | amdgpu_bo_fence(bo, | |
5b21d3e5 | 1471 | &avm->process_info->eviction_fence->base, |
a46a2cd1 FK |
1472 | true); |
1473 | ret = unreserve_bo_and_vms(&ctx, false, false); | |
1474 | ||
1475 | goto out; | |
1476 | ||
1477 | map_bo_to_gpuvm_failed: | |
1478 | if (bo_va_entry_aql) | |
1479 | remove_bo_from_vm(adev, bo_va_entry_aql, bo_size); | |
1480 | add_bo_to_vm_failed_aql: | |
1481 | if (bo_va_entry) | |
1482 | remove_bo_from_vm(adev, bo_va_entry, bo_size); | |
1483 | add_bo_to_vm_failed: | |
1484 | unreserve_bo_and_vms(&ctx, false, false); | |
1485 | out: | |
1486 | mutex_unlock(&mem->process_info->lock); | |
1487 | mutex_unlock(&mem->lock); | |
1488 | return ret; | |
1489 | } | |
1490 | ||
1491 | int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( | |
1492 | struct kgd_dev *kgd, struct kgd_mem *mem, void *vm) | |
1493 | { | |
1494 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | |
1495 | struct amdkfd_process_info *process_info = | |
5b21d3e5 | 1496 | ((struct amdgpu_vm *)vm)->process_info; |
a46a2cd1 FK |
1497 | unsigned long bo_size = mem->bo->tbo.mem.size; |
1498 | struct kfd_bo_va_list *entry; | |
1499 | struct bo_vm_reservation_context ctx; | |
1500 | int ret; | |
1501 | ||
1502 | mutex_lock(&mem->lock); | |
1503 | ||
1504 | ret = reserve_bo_and_cond_vms(mem, vm, BO_VM_MAPPED, &ctx); | |
1505 | if (unlikely(ret)) | |
1506 | goto out; | |
1507 | /* If no VMs were reserved, it means the BO wasn't actually mapped */ | |
1508 | if (ctx.n_vms == 0) { | |
1509 | ret = -EINVAL; | |
1510 | goto unreserve_out; | |
1511 | } | |
1512 | ||
5b21d3e5 | 1513 | ret = vm_validate_pt_pd_bos((struct amdgpu_vm *)vm); |
a46a2cd1 FK |
1514 | if (unlikely(ret)) |
1515 | goto unreserve_out; | |
1516 | ||
1517 | pr_debug("Unmap VA 0x%llx - 0x%llx from vm %p\n", | |
1518 | mem->va, | |
1519 | mem->va + bo_size * (1 + mem->aql_queue), | |
1520 | vm); | |
1521 | ||
1522 | list_for_each_entry(entry, &mem->bo_va_list, bo_list) { | |
1523 | if (entry->bo_va->base.vm == vm && entry->is_mapped) { | |
1524 | pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n", | |
1525 | entry->va, | |
1526 | entry->va + bo_size, | |
1527 | entry); | |
1528 | ||
1529 | ret = unmap_bo_from_gpuvm(adev, entry, ctx.sync); | |
1530 | if (ret == 0) { | |
1531 | entry->is_mapped = false; | |
1532 | } else { | |
1533 | pr_err("failed to unmap VA 0x%llx\n", | |
1534 | mem->va); | |
1535 | goto unreserve_out; | |
1536 | } | |
1537 | ||
1538 | mem->mapped_to_gpu_memory--; | |
1539 | pr_debug("\t DEC mapping count %d\n", | |
1540 | mem->mapped_to_gpu_memory); | |
1541 | } | |
1542 | } | |
1543 | ||
1544 | /* If BO is unmapped from all VMs, unfence it. It can be evicted if | |
1545 | * required. | |
1546 | */ | |
1547 | if (mem->mapped_to_gpu_memory == 0 && | |
1548 | !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && !mem->bo->pin_count) | |
1549 | amdgpu_amdkfd_remove_eviction_fence(mem->bo, | |
2d086fde | 1550 | process_info->eviction_fence); |
a46a2cd1 FK |
1551 | |
1552 | unreserve_out: | |
1553 | unreserve_bo_and_vms(&ctx, false, false); | |
1554 | out: | |
1555 | mutex_unlock(&mem->lock); | |
1556 | return ret; | |
1557 | } | |
1558 | ||
1559 | int amdgpu_amdkfd_gpuvm_sync_memory( | |
1560 | struct kgd_dev *kgd, struct kgd_mem *mem, bool intr) | |
1561 | { | |
1562 | struct amdgpu_sync sync; | |
1563 | int ret; | |
1564 | ||
1565 | amdgpu_sync_create(&sync); | |
1566 | ||
1567 | mutex_lock(&mem->lock); | |
1568 | amdgpu_sync_clone(&mem->sync, &sync); | |
1569 | mutex_unlock(&mem->lock); | |
1570 | ||
1571 | ret = amdgpu_sync_wait(&sync, intr); | |
1572 | amdgpu_sync_free(&sync); | |
1573 | return ret; | |
1574 | } | |
1575 | ||
1576 | int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, | |
1577 | struct kgd_mem *mem, void **kptr, uint64_t *size) | |
1578 | { | |
1579 | int ret; | |
1580 | struct amdgpu_bo *bo = mem->bo; | |
1581 | ||
1582 | if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { | |
1583 | pr_err("userptr can't be mapped to kernel\n"); | |
1584 | return -EINVAL; | |
1585 | } | |
1586 | ||
1587 | /* delete kgd_mem from kfd_bo_list to avoid re-validating | |
1588 | * this BO in BO's restoring after eviction. | |
1589 | */ | |
1590 | mutex_lock(&mem->process_info->lock); | |
1591 | ||
1592 | ret = amdgpu_bo_reserve(bo, true); | |
1593 | if (ret) { | |
1594 | pr_err("Failed to reserve bo. ret %d\n", ret); | |
1595 | goto bo_reserve_failed; | |
1596 | } | |
1597 | ||
7b7c6c81 | 1598 | ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); |
a46a2cd1 FK |
1599 | if (ret) { |
1600 | pr_err("Failed to pin bo. ret %d\n", ret); | |
1601 | goto pin_failed; | |
1602 | } | |
1603 | ||
1604 | ret = amdgpu_bo_kmap(bo, kptr); | |
1605 | if (ret) { | |
1606 | pr_err("Failed to map bo to kernel. ret %d\n", ret); | |
1607 | goto kmap_failed; | |
1608 | } | |
1609 | ||
1610 | amdgpu_amdkfd_remove_eviction_fence( | |
2d086fde | 1611 | bo, mem->process_info->eviction_fence); |
a46a2cd1 FK |
1612 | list_del_init(&mem->validate_list.head); |
1613 | ||
1614 | if (size) | |
1615 | *size = amdgpu_bo_size(bo); | |
1616 | ||
1617 | amdgpu_bo_unreserve(bo); | |
1618 | ||
1619 | mutex_unlock(&mem->process_info->lock); | |
1620 | return 0; | |
1621 | ||
1622 | kmap_failed: | |
1623 | amdgpu_bo_unpin(bo); | |
1624 | pin_failed: | |
1625 | amdgpu_bo_unreserve(bo); | |
1626 | bo_reserve_failed: | |
1627 | mutex_unlock(&mem->process_info->lock); | |
1628 | ||
1629 | return ret; | |
1630 | } | |
1631 | ||
b97dfa27 | 1632 | int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, |
1633 | struct kfd_vm_fault_info *mem) | |
1634 | { | |
1635 | struct amdgpu_device *adev; | |
1636 | ||
1637 | adev = (struct amdgpu_device *)kgd; | |
1638 | if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) { | |
1639 | *mem = *adev->gmc.vm_fault_info; | |
1640 | mb(); | |
1641 | atomic_set(&adev->gmc.vm_fault_info_updated, 0); | |
1642 | } | |
1643 | return 0; | |
1644 | } | |
1645 | ||
1dde0ea9 FK |
1646 | int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, |
1647 | struct dma_buf *dma_buf, | |
1648 | uint64_t va, void *vm, | |
1649 | struct kgd_mem **mem, uint64_t *size, | |
1650 | uint64_t *mmap_offset) | |
1651 | { | |
1652 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; | |
1653 | struct drm_gem_object *obj; | |
1654 | struct amdgpu_bo *bo; | |
1655 | struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; | |
1656 | ||
1657 | if (dma_buf->ops != &amdgpu_dmabuf_ops) | |
1658 | /* Can't handle non-graphics buffers */ | |
1659 | return -EINVAL; | |
1660 | ||
1661 | obj = dma_buf->priv; | |
1662 | if (obj->dev->dev_private != adev) | |
1663 | /* Can't handle buffers from other devices */ | |
1664 | return -EINVAL; | |
1665 | ||
1666 | bo = gem_to_amdgpu_bo(obj); | |
1667 | if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | | |
1668 | AMDGPU_GEM_DOMAIN_GTT))) | |
1669 | /* Only VRAM and GTT BOs are supported */ | |
1670 | return -EINVAL; | |
1671 | ||
1672 | *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); | |
1673 | if (!*mem) | |
1674 | return -ENOMEM; | |
1675 | ||
1676 | if (size) | |
1677 | *size = amdgpu_bo_size(bo); | |
1678 | ||
1679 | if (mmap_offset) | |
1680 | *mmap_offset = amdgpu_bo_mmap_offset(bo); | |
1681 | ||
1682 | INIT_LIST_HEAD(&(*mem)->bo_va_list); | |
1683 | mutex_init(&(*mem)->lock); | |
1d251d90 | 1684 | |
d0ba51b1 FK |
1685 | (*mem)->alloc_flags = |
1686 | ((bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? | |
1d251d90 YZ |
1687 | KFD_IOC_ALLOC_MEM_FLAGS_VRAM : KFD_IOC_ALLOC_MEM_FLAGS_GTT) |
1688 | | KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE | |
1689 | | KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE; | |
1dde0ea9 | 1690 | |
39b3128d FK |
1691 | drm_gem_object_get(&bo->tbo.base); |
1692 | (*mem)->bo = bo; | |
1dde0ea9 FK |
1693 | (*mem)->va = va; |
1694 | (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? | |
1695 | AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT; | |
1696 | (*mem)->mapped_to_gpu_memory = 0; | |
1697 | (*mem)->process_info = avm->process_info; | |
1698 | add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false); | |
1699 | amdgpu_sync_create(&(*mem)->sync); | |
1700 | ||
1701 | return 0; | |
1702 | } | |
1703 | ||
5ae0283e FK |
1704 | /* Evict a userptr BO by stopping the queues if necessary |
1705 | * | |
1706 | * Runs in MMU notifier, may be in RECLAIM_FS context. This means it | |
1707 | * cannot do any memory allocations, and cannot take any locks that | |
1708 | * are held elsewhere while allocating memory. Therefore this is as | |
1709 | * simple as possible, using atomic counters. | |
1710 | * | |
1711 | * It doesn't do anything to the BO itself. The real work happens in | |
1712 | * restore, where we get updated page addresses. This function only | |
1713 | * ensures that GPU access to the BO is stopped. | |
1714 | */ | |
e52482de FK |
1715 | int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, |
1716 | struct mm_struct *mm) | |
1717 | { | |
5ae0283e | 1718 | struct amdkfd_process_info *process_info = mem->process_info; |
9e089a29 | 1719 | int evicted_bos; |
5ae0283e FK |
1720 | int r = 0; |
1721 | ||
9e089a29 | 1722 | atomic_inc(&mem->invalid); |
5ae0283e FK |
1723 | evicted_bos = atomic_inc_return(&process_info->evicted_bos); |
1724 | if (evicted_bos == 1) { | |
1725 | /* First eviction, stop the queues */ | |
8e07e267 | 1726 | r = kgd2kfd_quiesce_mm(mm); |
5ae0283e FK |
1727 | if (r) |
1728 | pr_err("Failed to quiesce KFD\n"); | |
1729 | schedule_delayed_work(&process_info->restore_userptr_work, | |
1730 | msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS)); | |
1731 | } | |
1732 | ||
1733 | return r; | |
1734 | } | |
1735 | ||
1736 | /* Update invalid userptr BOs | |
1737 | * | |
1738 | * Moves invalidated (evicted) userptr BOs from userptr_valid_list to | |
1739 | * userptr_inval_list and updates user pages for all BOs that have | |
1740 | * been invalidated since their last update. | |
1741 | */ | |
1742 | static int update_invalid_user_pages(struct amdkfd_process_info *process_info, | |
1743 | struct mm_struct *mm) | |
1744 | { | |
1745 | struct kgd_mem *mem, *tmp_mem; | |
1746 | struct amdgpu_bo *bo; | |
1747 | struct ttm_operation_ctx ctx = { false, false }; | |
1748 | int invalid, ret; | |
1749 | ||
1750 | /* Move all invalidated BOs to the userptr_inval_list and | |
1751 | * release their user pages by migration to the CPU domain | |
1752 | */ | |
1753 | list_for_each_entry_safe(mem, tmp_mem, | |
1754 | &process_info->userptr_valid_list, | |
1755 | validate_list.head) { | |
1756 | if (!atomic_read(&mem->invalid)) | |
1757 | continue; /* BO is still valid */ | |
1758 | ||
1759 | bo = mem->bo; | |
1760 | ||
1761 | if (amdgpu_bo_reserve(bo, true)) | |
1762 | return -EAGAIN; | |
c704ab18 | 1763 | amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); |
5ae0283e FK |
1764 | ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); |
1765 | amdgpu_bo_unreserve(bo); | |
1766 | if (ret) { | |
1767 | pr_err("%s: Failed to invalidate userptr BO\n", | |
1768 | __func__); | |
1769 | return -EAGAIN; | |
1770 | } | |
1771 | ||
1772 | list_move_tail(&mem->validate_list.head, | |
1773 | &process_info->userptr_inval_list); | |
1774 | } | |
1775 | ||
1776 | if (list_empty(&process_info->userptr_inval_list)) | |
1777 | return 0; /* All evicted userptr BOs were freed */ | |
1778 | ||
1779 | /* Go through userptr_inval_list and update any invalid user_pages */ | |
1780 | list_for_each_entry(mem, &process_info->userptr_inval_list, | |
1781 | validate_list.head) { | |
1782 | invalid = atomic_read(&mem->invalid); | |
1783 | if (!invalid) | |
1784 | /* BO hasn't been invalidated since the last | |
1785 | * revalidation attempt. Keep its BO list. | |
1786 | */ | |
1787 | continue; | |
1788 | ||
1789 | bo = mem->bo; | |
1790 | ||
5ae0283e | 1791 | /* Get updated user pages */ |
e5eaa7cc | 1792 | ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages); |
5ae0283e | 1793 | if (ret) { |
e82fdb16 | 1794 | pr_debug("%s: Failed to get user pages: %d\n", |
5ae0283e | 1795 | __func__, ret); |
e52482de | 1796 | |
e82fdb16 PY |
1797 | /* Return error -EBUSY or -ENOMEM, retry restore */ |
1798 | return ret; | |
1799 | } | |
6c55d6e9 | 1800 | |
81fa1af3 JG |
1801 | /* |
1802 | * FIXME: Cannot ignore the return code, must hold | |
1803 | * notifier_lock | |
1804 | */ | |
6c55d6e9 | 1805 | amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); |
f4fd28b6 FK |
1806 | |
1807 | /* Mark the BO as valid unless it was invalidated | |
1808 | * again concurrently. | |
1809 | */ | |
1810 | if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid) | |
1811 | return -EAGAIN; | |
6c55d6e9 | 1812 | } |
e82fdb16 PY |
1813 | |
1814 | return 0; | |
6c55d6e9 PY |
1815 | } |
1816 | ||
5ae0283e FK |
1817 | /* Validate invalid userptr BOs |
1818 | * | |
1819 | * Validates BOs on the userptr_inval_list, and moves them back to the | |
1820 | * userptr_valid_list. Also updates GPUVM page tables with new page | |
1821 | * addresses and waits for the page table updates to complete. | |
1822 | */ | |
1823 | static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) | |
1824 | { | |
1825 | struct amdgpu_bo_list_entry *pd_bo_list_entries; | |
1826 | struct list_head resv_list, duplicates; | |
1827 | struct ww_acquire_ctx ticket; | |
1828 | struct amdgpu_sync sync; | |
1829 | ||
1830 | struct amdgpu_vm *peer_vm; | |
1831 | struct kgd_mem *mem, *tmp_mem; | |
1832 | struct amdgpu_bo *bo; | |
1833 | struct ttm_operation_ctx ctx = { false, false }; | |
1834 | int i, ret; | |
1835 | ||
1836 | pd_bo_list_entries = kcalloc(process_info->n_vms, | |
1837 | sizeof(struct amdgpu_bo_list_entry), | |
1838 | GFP_KERNEL); | |
1839 | if (!pd_bo_list_entries) { | |
1840 | pr_err("%s: Failed to allocate PD BO list entries\n", __func__); | |
899fbde1 PY |
1841 | ret = -ENOMEM; |
1842 | goto out_no_mem; | |
5ae0283e FK |
1843 | } |
1844 | ||
1845 | INIT_LIST_HEAD(&resv_list); | |
1846 | INIT_LIST_HEAD(&duplicates); | |
1847 | ||
1848 | /* Get all the page directory BOs that need to be reserved */ | |
1849 | i = 0; | |
1850 | list_for_each_entry(peer_vm, &process_info->vm_list_head, | |
1851 | vm_list_node) | |
1852 | amdgpu_vm_get_pd_bo(peer_vm, &resv_list, | |
1853 | &pd_bo_list_entries[i++]); | |
1854 | /* Add the userptr_inval_list entries to resv_list */ | |
1855 | list_for_each_entry(mem, &process_info->userptr_inval_list, | |
1856 | validate_list.head) { | |
1857 | list_add_tail(&mem->resv_list.head, &resv_list); | |
1858 | mem->resv_list.bo = mem->validate_list.bo; | |
a9f34c70 | 1859 | mem->resv_list.num_shared = mem->validate_list.num_shared; |
5ae0283e FK |
1860 | } |
1861 | ||
1862 | /* Reserve all BOs and page tables for validation */ | |
9165fb87 | 1863 | ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates); |
5ae0283e FK |
1864 | WARN(!list_empty(&duplicates), "Duplicates should be empty"); |
1865 | if (ret) | |
899fbde1 | 1866 | goto out_free; |
5ae0283e FK |
1867 | |
1868 | amdgpu_sync_create(&sync); | |
1869 | ||
5ae0283e FK |
1870 | ret = process_validate_vms(process_info); |
1871 | if (ret) | |
1872 | goto unreserve_out; | |
1873 | ||
1874 | /* Validate BOs and update GPUVM page tables */ | |
1875 | list_for_each_entry_safe(mem, tmp_mem, | |
1876 | &process_info->userptr_inval_list, | |
1877 | validate_list.head) { | |
1878 | struct kfd_bo_va_list *bo_va_entry; | |
1879 | ||
1880 | bo = mem->bo; | |
1881 | ||
899fbde1 PY |
1882 | /* Validate the BO if we got user pages */ |
1883 | if (bo->tbo.ttm->pages[0]) { | |
c704ab18 | 1884 | amdgpu_bo_placement_from_domain(bo, mem->domain); |
5ae0283e FK |
1885 | ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); |
1886 | if (ret) { | |
1887 | pr_err("%s: failed to validate BO\n", __func__); | |
1888 | goto unreserve_out; | |
1889 | } | |
1890 | } | |
1891 | ||
5ae0283e FK |
1892 | list_move_tail(&mem->validate_list.head, |
1893 | &process_info->userptr_valid_list); | |
1894 | ||
1895 | /* Update mapping. If the BO was not validated | |
1896 | * (because we couldn't get user pages), this will | |
1897 | * clear the page table entries, which will result in | |
1898 | * VM faults if the GPU tries to access the invalid | |
1899 | * memory. | |
1900 | */ | |
1901 | list_for_each_entry(bo_va_entry, &mem->bo_va_list, bo_list) { | |
1902 | if (!bo_va_entry->is_mapped) | |
1903 | continue; | |
1904 | ||
1905 | ret = update_gpuvm_pte((struct amdgpu_device *) | |
1906 | bo_va_entry->kgd_dev, | |
1907 | bo_va_entry, &sync); | |
1908 | if (ret) { | |
1909 | pr_err("%s: update PTE failed\n", __func__); | |
1910 | /* make sure this gets validated again */ | |
1911 | atomic_inc(&mem->invalid); | |
1912 | goto unreserve_out; | |
1913 | } | |
1914 | } | |
1915 | } | |
1916 | ||
1917 | /* Update page directories */ | |
1918 | ret = process_update_pds(process_info, &sync); | |
1919 | ||
1920 | unreserve_out: | |
5ae0283e FK |
1921 | ttm_eu_backoff_reservation(&ticket, &resv_list); |
1922 | amdgpu_sync_wait(&sync, false); | |
1923 | amdgpu_sync_free(&sync); | |
899fbde1 | 1924 | out_free: |
5ae0283e | 1925 | kfree(pd_bo_list_entries); |
899fbde1 | 1926 | out_no_mem: |
5ae0283e FK |
1927 | |
1928 | return ret; | |
1929 | } | |
1930 | ||
1931 | /* Worker callback to restore evicted userptr BOs | |
1932 | * | |
1933 | * Tries to update and validate all userptr BOs. If successful and no | |
1934 | * concurrent evictions happened, the queues are restarted. Otherwise, | |
1935 | * reschedule for another attempt later. | |
1936 | */ | |
1937 | static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work) | |
1938 | { | |
1939 | struct delayed_work *dwork = to_delayed_work(work); | |
1940 | struct amdkfd_process_info *process_info = | |
1941 | container_of(dwork, struct amdkfd_process_info, | |
1942 | restore_userptr_work); | |
1943 | struct task_struct *usertask; | |
1944 | struct mm_struct *mm; | |
1945 | int evicted_bos; | |
1946 | ||
1947 | evicted_bos = atomic_read(&process_info->evicted_bos); | |
1948 | if (!evicted_bos) | |
1949 | return; | |
1950 | ||
1951 | /* Reference task and mm in case of concurrent process termination */ | |
1952 | usertask = get_pid_task(process_info->pid, PIDTYPE_PID); | |
1953 | if (!usertask) | |
1954 | return; | |
1955 | mm = get_task_mm(usertask); | |
1956 | if (!mm) { | |
1957 | put_task_struct(usertask); | |
1958 | return; | |
1959 | } | |
1960 | ||
1961 | mutex_lock(&process_info->lock); | |
1962 | ||
1963 | if (update_invalid_user_pages(process_info, mm)) | |
1964 | goto unlock_out; | |
1965 | /* userptr_inval_list can be empty if all evicted userptr BOs | |
1966 | * have been freed. In that case there is nothing to validate | |
1967 | * and we can just restart the queues. | |
1968 | */ | |
1969 | if (!list_empty(&process_info->userptr_inval_list)) { | |
1970 | if (atomic_read(&process_info->evicted_bos) != evicted_bos) | |
1971 | goto unlock_out; /* Concurrent eviction, try again */ | |
1972 | ||
1973 | if (validate_invalid_user_pages(process_info)) | |
1974 | goto unlock_out; | |
1975 | } | |
1976 | /* Final check for concurrent evicton and atomic update. If | |
1977 | * another eviction happens after successful update, it will | |
1978 | * be a first eviction that calls quiesce_mm. The eviction | |
1979 | * reference counting inside KFD will handle this case. | |
1980 | */ | |
1981 | if (atomic_cmpxchg(&process_info->evicted_bos, evicted_bos, 0) != | |
1982 | evicted_bos) | |
1983 | goto unlock_out; | |
1984 | evicted_bos = 0; | |
8e07e267 | 1985 | if (kgd2kfd_resume_mm(mm)) { |
5ae0283e FK |
1986 | pr_err("%s: Failed to resume KFD\n", __func__); |
1987 | /* No recovery from this failure. Probably the CP is | |
1988 | * hanging. No point trying again. | |
1989 | */ | |
1990 | } | |
6c55d6e9 | 1991 | |
5ae0283e FK |
1992 | unlock_out: |
1993 | mutex_unlock(&process_info->lock); | |
1994 | mmput(mm); | |
1995 | put_task_struct(usertask); | |
1996 | ||
1997 | /* If validation failed, reschedule another attempt */ | |
1998 | if (evicted_bos) | |
1999 | schedule_delayed_work(&process_info->restore_userptr_work, | |
2000 | msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS)); | |
2001 | } | |
2002 | ||
a46a2cd1 FK |
2003 | /** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given |
2004 | * KFD process identified by process_info | |
2005 | * | |
2006 | * @process_info: amdkfd_process_info of the KFD process | |
2007 | * | |
2008 | * After memory eviction, restore thread calls this function. The function | |
2009 | * should be called when the Process is still valid. BO restore involves - | |
2010 | * | |
2011 | * 1. Release old eviction fence and create new one | |
2012 | * 2. Get two copies of PD BO list from all the VMs. Keep one copy as pd_list. | |
2013 | * 3 Use the second PD list and kfd_bo_list to create a list (ctx.list) of | |
2014 | * BOs that need to be reserved. | |
2015 | * 4. Reserve all the BOs | |
2016 | * 5. Validate of PD and PT BOs. | |
2017 | * 6. Validate all KFD BOs using kfd_bo_list and Map them and add new fence | |
2018 | * 7. Add fence to all PD and PT BOs. | |
2019 | * 8. Unreserve all BOs | |
2020 | */ | |
2021 | int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) | |
2022 | { | |
2023 | struct amdgpu_bo_list_entry *pd_bo_list; | |
2024 | struct amdkfd_process_info *process_info = info; | |
5b21d3e5 | 2025 | struct amdgpu_vm *peer_vm; |
a46a2cd1 FK |
2026 | struct kgd_mem *mem; |
2027 | struct bo_vm_reservation_context ctx; | |
2028 | struct amdgpu_amdkfd_fence *new_fence; | |
2029 | int ret = 0, i; | |
2030 | struct list_head duplicate_save; | |
2031 | struct amdgpu_sync sync_obj; | |
2032 | ||
2033 | INIT_LIST_HEAD(&duplicate_save); | |
2034 | INIT_LIST_HEAD(&ctx.list); | |
2035 | INIT_LIST_HEAD(&ctx.duplicates); | |
2036 | ||
2037 | pd_bo_list = kcalloc(process_info->n_vms, | |
2038 | sizeof(struct amdgpu_bo_list_entry), | |
2039 | GFP_KERNEL); | |
2040 | if (!pd_bo_list) | |
2041 | return -ENOMEM; | |
2042 | ||
2043 | i = 0; | |
2044 | mutex_lock(&process_info->lock); | |
2045 | list_for_each_entry(peer_vm, &process_info->vm_list_head, | |
2046 | vm_list_node) | |
5b21d3e5 | 2047 | amdgpu_vm_get_pd_bo(peer_vm, &ctx.list, &pd_bo_list[i++]); |
a46a2cd1 FK |
2048 | |
2049 | /* Reserve all BOs and page tables/directory. Add all BOs from | |
2050 | * kfd_bo_list to ctx.list | |
2051 | */ | |
2052 | list_for_each_entry(mem, &process_info->kfd_bo_list, | |
2053 | validate_list.head) { | |
2054 | ||
2055 | list_add_tail(&mem->resv_list.head, &ctx.list); | |
2056 | mem->resv_list.bo = mem->validate_list.bo; | |
a9f34c70 | 2057 | mem->resv_list.num_shared = mem->validate_list.num_shared; |
a46a2cd1 FK |
2058 | } |
2059 | ||
2060 | ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list, | |
9165fb87 | 2061 | false, &duplicate_save); |
a46a2cd1 FK |
2062 | if (ret) { |
2063 | pr_debug("Memory eviction: TTM Reserve Failed. Try again\n"); | |
2064 | goto ttm_reserve_fail; | |
2065 | } | |
2066 | ||
2067 | amdgpu_sync_create(&sync_obj); | |
2068 | ||
2069 | /* Validate PDs and PTs */ | |
2070 | ret = process_validate_vms(process_info); | |
2071 | if (ret) | |
2072 | goto validate_map_fail; | |
2073 | ||
9130cc01 HK |
2074 | ret = process_sync_pds_resv(process_info, &sync_obj); |
2075 | if (ret) { | |
2076 | pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n"); | |
2077 | goto validate_map_fail; | |
a46a2cd1 FK |
2078 | } |
2079 | ||
2080 | /* Validate BOs and map them to GPUVM (update VM page tables). */ | |
2081 | list_for_each_entry(mem, &process_info->kfd_bo_list, | |
2082 | validate_list.head) { | |
2083 | ||
2084 | struct amdgpu_bo *bo = mem->bo; | |
2085 | uint32_t domain = mem->domain; | |
2086 | struct kfd_bo_va_list *bo_va_entry; | |
2087 | ||
2088 | ret = amdgpu_amdkfd_bo_validate(bo, domain, false); | |
2089 | if (ret) { | |
2090 | pr_debug("Memory eviction: Validate BOs failed. Try again\n"); | |
2091 | goto validate_map_fail; | |
2092 | } | |
e095fc17 | 2093 | ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving, false); |
3d97da44 HK |
2094 | if (ret) { |
2095 | pr_debug("Memory eviction: Sync BO fence failed. Try again\n"); | |
2096 | goto validate_map_fail; | |
2097 | } | |
a46a2cd1 FK |
2098 | list_for_each_entry(bo_va_entry, &mem->bo_va_list, |
2099 | bo_list) { | |
2100 | ret = update_gpuvm_pte((struct amdgpu_device *) | |
2101 | bo_va_entry->kgd_dev, | |
2102 | bo_va_entry, | |
2103 | &sync_obj); | |
2104 | if (ret) { | |
2105 | pr_debug("Memory eviction: update PTE failed. Try again\n"); | |
2106 | goto validate_map_fail; | |
2107 | } | |
2108 | } | |
2109 | } | |
2110 | ||
2111 | /* Update page directories */ | |
2112 | ret = process_update_pds(process_info, &sync_obj); | |
2113 | if (ret) { | |
2114 | pr_debug("Memory eviction: update PDs failed. Try again\n"); | |
2115 | goto validate_map_fail; | |
2116 | } | |
2117 | ||
3d97da44 | 2118 | /* Wait for validate and PT updates to finish */ |
a46a2cd1 FK |
2119 | amdgpu_sync_wait(&sync_obj, false); |
2120 | ||
2121 | /* Release old eviction fence and create new one, because fence only | |
2122 | * goes from unsignaled to signaled, fence cannot be reused. | |
2123 | * Use context and mm from the old fence. | |
2124 | */ | |
2125 | new_fence = amdgpu_amdkfd_fence_create( | |
2126 | process_info->eviction_fence->base.context, | |
2127 | process_info->eviction_fence->mm); | |
2128 | if (!new_fence) { | |
2129 | pr_err("Failed to create eviction fence\n"); | |
2130 | ret = -ENOMEM; | |
2131 | goto validate_map_fail; | |
2132 | } | |
2133 | dma_fence_put(&process_info->eviction_fence->base); | |
2134 | process_info->eviction_fence = new_fence; | |
2135 | *ef = dma_fence_get(&new_fence->base); | |
2136 | ||
3d97da44 | 2137 | /* Attach new eviction fence to all BOs */ |
a46a2cd1 FK |
2138 | list_for_each_entry(mem, &process_info->kfd_bo_list, |
2139 | validate_list.head) | |
2140 | amdgpu_bo_fence(mem->bo, | |
2141 | &process_info->eviction_fence->base, true); | |
2142 | ||
2143 | /* Attach eviction fence to PD / PT BOs */ | |
2144 | list_for_each_entry(peer_vm, &process_info->vm_list_head, | |
2145 | vm_list_node) { | |
5b21d3e5 | 2146 | struct amdgpu_bo *bo = peer_vm->root.base.bo; |
a46a2cd1 FK |
2147 | |
2148 | amdgpu_bo_fence(bo, &process_info->eviction_fence->base, true); | |
2149 | } | |
2150 | ||
2151 | validate_map_fail: | |
2152 | ttm_eu_backoff_reservation(&ctx.ticket, &ctx.list); | |
2153 | amdgpu_sync_free(&sync_obj); | |
2154 | ttm_reserve_fail: | |
2155 | mutex_unlock(&process_info->lock); | |
2156 | kfree(pd_bo_list); | |
2157 | return ret; | |
2158 | } | |
71efab6a OZ |
2159 | |
2160 | int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem) | |
2161 | { | |
2162 | struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info; | |
2163 | struct amdgpu_bo *gws_bo = (struct amdgpu_bo *)gws; | |
2164 | int ret; | |
2165 | ||
2166 | if (!info || !gws) | |
2167 | return -EINVAL; | |
2168 | ||
2169 | *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); | |
2170 | if (!*mem) | |
443e902e | 2171 | return -ENOMEM; |
71efab6a OZ |
2172 | |
2173 | mutex_init(&(*mem)->lock); | |
f583cc57 | 2174 | INIT_LIST_HEAD(&(*mem)->bo_va_list); |
71efab6a OZ |
2175 | (*mem)->bo = amdgpu_bo_ref(gws_bo); |
2176 | (*mem)->domain = AMDGPU_GEM_DOMAIN_GWS; | |
2177 | (*mem)->process_info = process_info; | |
2178 | add_kgd_mem_to_kfd_bo_list(*mem, process_info, false); | |
2179 | amdgpu_sync_create(&(*mem)->sync); | |
2180 | ||
2181 | ||
2182 | /* Validate gws bo the first time it is added to process */ | |
2183 | mutex_lock(&(*mem)->process_info->lock); | |
2184 | ret = amdgpu_bo_reserve(gws_bo, false); | |
2185 | if (unlikely(ret)) { | |
2186 | pr_err("Reserve gws bo failed %d\n", ret); | |
2187 | goto bo_reservation_failure; | |
2188 | } | |
2189 | ||
2190 | ret = amdgpu_amdkfd_bo_validate(gws_bo, AMDGPU_GEM_DOMAIN_GWS, true); | |
2191 | if (ret) { | |
2192 | pr_err("GWS BO validate failed %d\n", ret); | |
2193 | goto bo_validation_failure; | |
2194 | } | |
2195 | /* GWS resource is shared b/t amdgpu and amdkfd | |
2196 | * Add process eviction fence to bo so they can | |
2197 | * evict each other. | |
2198 | */ | |
52791eee | 2199 | ret = dma_resv_reserve_shared(gws_bo->tbo.base.resv, 1); |
96cf624b OZ |
2200 | if (ret) |
2201 | goto reserve_shared_fail; | |
71efab6a OZ |
2202 | amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true); |
2203 | amdgpu_bo_unreserve(gws_bo); | |
2204 | mutex_unlock(&(*mem)->process_info->lock); | |
2205 | ||
2206 | return ret; | |
2207 | ||
96cf624b | 2208 | reserve_shared_fail: |
71efab6a OZ |
2209 | bo_validation_failure: |
2210 | amdgpu_bo_unreserve(gws_bo); | |
2211 | bo_reservation_failure: | |
2212 | mutex_unlock(&(*mem)->process_info->lock); | |
2213 | amdgpu_sync_free(&(*mem)->sync); | |
2214 | remove_kgd_mem_from_kfd_bo_list(*mem, process_info); | |
2215 | amdgpu_bo_unref(&gws_bo); | |
2216 | mutex_destroy(&(*mem)->lock); | |
2217 | kfree(*mem); | |
2218 | *mem = NULL; | |
2219 | return ret; | |
2220 | } | |
2221 | ||
2222 | int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem) | |
2223 | { | |
2224 | int ret; | |
2225 | struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info; | |
2226 | struct kgd_mem *kgd_mem = (struct kgd_mem *)mem; | |
2227 | struct amdgpu_bo *gws_bo = kgd_mem->bo; | |
2228 | ||
2229 | /* Remove BO from process's validate list so restore worker won't touch | |
2230 | * it anymore | |
2231 | */ | |
2232 | remove_kgd_mem_from_kfd_bo_list(kgd_mem, process_info); | |
2233 | ||
2234 | ret = amdgpu_bo_reserve(gws_bo, false); | |
2235 | if (unlikely(ret)) { | |
2236 | pr_err("Reserve gws bo failed %d\n", ret); | |
2237 | //TODO add BO back to validate_list? | |
2238 | return ret; | |
2239 | } | |
2240 | amdgpu_amdkfd_remove_eviction_fence(gws_bo, | |
2241 | process_info->eviction_fence); | |
2242 | amdgpu_bo_unreserve(gws_bo); | |
2243 | amdgpu_sync_free(&kgd_mem->sync); | |
2244 | amdgpu_bo_unref(&gws_bo); | |
2245 | mutex_destroy(&kgd_mem->lock); | |
2246 | kfree(mem); | |
2247 | return 0; | |
2248 | } | |
fd7d08ba YZ |
2249 | |
2250 | /* Returns GPU-specific tiling mode information */ | |
2251 | int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, | |
2252 | struct tile_config *config) | |
2253 | { | |
2254 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; | |
2255 | ||
2256 | config->gb_addr_config = adev->gfx.config.gb_addr_config; | |
2257 | config->tile_config_ptr = adev->gfx.config.tile_mode_array; | |
2258 | config->num_tile_configs = | |
2259 | ARRAY_SIZE(adev->gfx.config.tile_mode_array); | |
2260 | config->macro_tile_config_ptr = | |
2261 | adev->gfx.config.macrotile_mode_array; | |
2262 | config->num_macro_tile_configs = | |
2263 | ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); | |
2264 | ||
2265 | /* Those values are not set from GFX9 onwards */ | |
2266 | config->num_banks = adev->gfx.config.num_banks; | |
2267 | config->num_ranks = adev->gfx.config.num_ranks; | |
2268 | ||
2269 | return 0; | |
2270 | } |