1 // SPDX-License-Identifier: GPL-2.0 OR MIT
3 * Copyright 2022 Advanced Micro Devices, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
24 #include <drm/drm_drv.h>
27 #include "amdgpu_trace.h"
28 #include "amdgpu_vm.h"
31 * amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt
33 struct amdgpu_vm_pt_cursor
{
35 struct amdgpu_vm_bo_base
*parent
;
36 struct amdgpu_vm_bo_base
*entry
;
41 * amdgpu_vm_pt_level_shift - return the addr shift for each level
43 * @adev: amdgpu_device pointer
47 * The number of bits the pfn needs to be right shifted for a level.
49 static unsigned int amdgpu_vm_pt_level_shift(struct amdgpu_device
*adev
,
56 return 9 * (AMDGPU_VM_PDB0
- level
) +
57 adev
->vm_manager
.block_size
;
66 * amdgpu_vm_pt_num_entries - return the number of entries in a PD/PT
68 * @adev: amdgpu_device pointer
72 * The number of entries in a page directory or page table.
74 static unsigned int amdgpu_vm_pt_num_entries(struct amdgpu_device
*adev
,
79 shift
= amdgpu_vm_pt_level_shift(adev
, adev
->vm_manager
.root_level
);
80 if (level
== adev
->vm_manager
.root_level
)
81 /* For the root directory */
82 return round_up(adev
->vm_manager
.max_pfn
, 1ULL << shift
)
84 else if (level
!= AMDGPU_VM_PTB
)
85 /* Everything in between */
88 /* For the page tables on the leaves */
89 return AMDGPU_VM_PTE_COUNT(adev
);
93 * amdgpu_vm_pt_num_ats_entries - return the number of ATS entries in the root PD
95 * @adev: amdgpu_device pointer
98 * The number of entries in the root page directory which needs the ATS setting.
100 static unsigned int amdgpu_vm_pt_num_ats_entries(struct amdgpu_device
*adev
)
104 shift
= amdgpu_vm_pt_level_shift(adev
, adev
->vm_manager
.root_level
);
105 return AMDGPU_GMC_HOLE_START
>> (shift
+ AMDGPU_GPU_PAGE_SHIFT
);
109 * amdgpu_vm_pt_entries_mask - the mask to get the entry number of a PD/PT
111 * @adev: amdgpu_device pointer
115 * The mask to extract the entry number of a PD/PT from an address.
117 static uint32_t amdgpu_vm_pt_entries_mask(struct amdgpu_device
*adev
,
120 if (level
<= adev
->vm_manager
.root_level
)
122 else if (level
!= AMDGPU_VM_PTB
)
125 return AMDGPU_VM_PTE_COUNT(adev
) - 1;
129 * amdgpu_vm_pt_size - returns the size of the page table in bytes
131 * @adev: amdgpu_device pointer
135 * The size of the BO for a page directory or page table in bytes.
137 static unsigned int amdgpu_vm_pt_size(struct amdgpu_device
*adev
,
140 return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_pt_num_entries(adev
, level
) * 8);
144 * amdgpu_vm_pt_parent - get the parent page directory
146 * @pt: child page table
148 * Helper to get the parent entry for the child page table. NULL if we are at
149 * the root page directory.
151 static struct amdgpu_vm_bo_base
*
152 amdgpu_vm_pt_parent(struct amdgpu_vm_bo_base
*pt
)
154 struct amdgpu_bo
*parent
= pt
->bo
->parent
;
159 return parent
->vm_bo
;
163 * amdgpu_vm_pt_start - start PD/PT walk
165 * @adev: amdgpu_device pointer
166 * @vm: amdgpu_vm structure
167 * @start: start address of the walk
168 * @cursor: state to initialize
170 * Initialize a amdgpu_vm_pt_cursor to start a walk.
172 static void amdgpu_vm_pt_start(struct amdgpu_device
*adev
,
173 struct amdgpu_vm
*vm
, uint64_t start
,
174 struct amdgpu_vm_pt_cursor
*cursor
)
177 cursor
->parent
= NULL
;
178 cursor
->entry
= &vm
->root
;
179 cursor
->level
= adev
->vm_manager
.root_level
;
183 * amdgpu_vm_pt_descendant - go to child node
185 * @adev: amdgpu_device pointer
186 * @cursor: current state
188 * Walk to the child node of the current node.
190 * True if the walk was possible, false otherwise.
192 static bool amdgpu_vm_pt_descendant(struct amdgpu_device
*adev
,
193 struct amdgpu_vm_pt_cursor
*cursor
)
195 unsigned int mask
, shift
, idx
;
197 if ((cursor
->level
== AMDGPU_VM_PTB
) || !cursor
->entry
||
201 mask
= amdgpu_vm_pt_entries_mask(adev
, cursor
->level
);
202 shift
= amdgpu_vm_pt_level_shift(adev
, cursor
->level
);
205 idx
= (cursor
->pfn
>> shift
) & mask
;
206 cursor
->parent
= cursor
->entry
;
207 cursor
->entry
= &to_amdgpu_bo_vm(cursor
->entry
->bo
)->entries
[idx
];
212 * amdgpu_vm_pt_sibling - go to sibling node
214 * @adev: amdgpu_device pointer
215 * @cursor: current state
217 * Walk to the sibling node of the current node.
219 * True if the walk was possible, false otherwise.
221 static bool amdgpu_vm_pt_sibling(struct amdgpu_device
*adev
,
222 struct amdgpu_vm_pt_cursor
*cursor
)
225 unsigned int shift
, num_entries
;
226 struct amdgpu_bo_vm
*parent
;
228 /* Root doesn't have a sibling */
232 /* Go to our parents and see if we got a sibling */
233 shift
= amdgpu_vm_pt_level_shift(adev
, cursor
->level
- 1);
234 num_entries
= amdgpu_vm_pt_num_entries(adev
, cursor
->level
- 1);
235 parent
= to_amdgpu_bo_vm(cursor
->parent
->bo
);
237 if (cursor
->entry
== &parent
->entries
[num_entries
- 1])
240 cursor
->pfn
+= 1ULL << shift
;
241 cursor
->pfn
&= ~((1ULL << shift
) - 1);
247 * amdgpu_vm_pt_ancestor - go to parent node
249 * @cursor: current state
251 * Walk to the parent node of the current node.
253 * True if the walk was possible, false otherwise.
255 static bool amdgpu_vm_pt_ancestor(struct amdgpu_vm_pt_cursor
*cursor
)
261 cursor
->entry
= cursor
->parent
;
262 cursor
->parent
= amdgpu_vm_pt_parent(cursor
->parent
);
267 * amdgpu_vm_pt_next - get next PD/PT in hieratchy
269 * @adev: amdgpu_device pointer
270 * @cursor: current state
272 * Walk the PD/PT tree to the next node.
274 static void amdgpu_vm_pt_next(struct amdgpu_device
*adev
,
275 struct amdgpu_vm_pt_cursor
*cursor
)
277 /* First try a newborn child */
278 if (amdgpu_vm_pt_descendant(adev
, cursor
))
281 /* If that didn't worked try to find a sibling */
282 while (!amdgpu_vm_pt_sibling(adev
, cursor
)) {
283 /* No sibling, go to our parents and grandparents */
284 if (!amdgpu_vm_pt_ancestor(cursor
)) {
292 * amdgpu_vm_pt_first_dfs - start a deep first search
294 * @adev: amdgpu_device structure
295 * @vm: amdgpu_vm structure
296 * @start: optional cursor to start with
297 * @cursor: state to initialize
299 * Starts a deep first traversal of the PD/PT tree.
301 static void amdgpu_vm_pt_first_dfs(struct amdgpu_device
*adev
,
302 struct amdgpu_vm
*vm
,
303 struct amdgpu_vm_pt_cursor
*start
,
304 struct amdgpu_vm_pt_cursor
*cursor
)
309 amdgpu_vm_pt_start(adev
, vm
, 0, cursor
);
311 while (amdgpu_vm_pt_descendant(adev
, cursor
))
316 * amdgpu_vm_pt_continue_dfs - check if the deep first search should continue
318 * @start: starting point for the search
319 * @entry: current entry
322 * True when the search should continue, false otherwise.
324 static bool amdgpu_vm_pt_continue_dfs(struct amdgpu_vm_pt_cursor
*start
,
325 struct amdgpu_vm_bo_base
*entry
)
327 return entry
&& (!start
|| entry
!= start
->entry
);
331 * amdgpu_vm_pt_next_dfs - get the next node for a deep first search
333 * @adev: amdgpu_device structure
334 * @cursor: current state
336 * Move the cursor to the next node in a deep first search.
338 static void amdgpu_vm_pt_next_dfs(struct amdgpu_device
*adev
,
339 struct amdgpu_vm_pt_cursor
*cursor
)
345 cursor
->entry
= NULL
;
346 else if (amdgpu_vm_pt_sibling(adev
, cursor
))
347 while (amdgpu_vm_pt_descendant(adev
, cursor
))
350 amdgpu_vm_pt_ancestor(cursor
);
354 * for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs
356 #define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) \
357 for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor)), \
358 (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor));\
359 amdgpu_vm_pt_continue_dfs((start), (entry)); \
360 (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor)))
363 * amdgpu_vm_pt_clear - initially clear the PDs/PTs
365 * @adev: amdgpu_device pointer
366 * @vm: VM to clear BO from
368 * @immediate: use an immediate update
370 * Root PD needs to be reserved when calling this.
373 * 0 on success, errno otherwise.
375 int amdgpu_vm_pt_clear(struct amdgpu_device
*adev
, struct amdgpu_vm
*vm
,
376 struct amdgpu_bo_vm
*vmbo
, bool immediate
)
378 unsigned int level
= adev
->vm_manager
.root_level
;
379 struct ttm_operation_ctx ctx
= { true, false };
380 struct amdgpu_vm_update_params params
;
381 struct amdgpu_bo
*ancestor
= &vmbo
->bo
;
382 unsigned int entries
, ats_entries
;
383 struct amdgpu_bo
*bo
= &vmbo
->bo
;
387 /* Figure out our place in the hierarchy */
388 if (ancestor
->parent
) {
390 while (ancestor
->parent
->parent
) {
392 ancestor
= ancestor
->parent
;
396 entries
= amdgpu_bo_size(bo
) / 8;
397 if (!vm
->pte_support_ats
) {
400 } else if (!bo
->parent
) {
401 ats_entries
= amdgpu_vm_pt_num_ats_entries(adev
);
402 ats_entries
= min(ats_entries
, entries
);
403 entries
-= ats_entries
;
406 struct amdgpu_vm_bo_base
*pt
;
408 pt
= ancestor
->vm_bo
;
409 ats_entries
= amdgpu_vm_pt_num_ats_entries(adev
);
410 if ((pt
- to_amdgpu_bo_vm(vm
->root
.bo
)->entries
) >=
414 ats_entries
= entries
;
419 r
= ttm_bo_validate(&bo
->tbo
, &bo
->placement
, &ctx
);
424 struct amdgpu_bo
*shadow
= vmbo
->shadow
;
426 r
= ttm_bo_validate(&shadow
->tbo
, &shadow
->placement
, &ctx
);
431 if (!drm_dev_enter(adev_to_drm(adev
), &idx
))
434 r
= vm
->update_funcs
->map_table(vmbo
);
438 memset(¶ms
, 0, sizeof(params
));
441 params
.immediate
= immediate
;
443 r
= vm
->update_funcs
->prepare(¶ms
, NULL
, AMDGPU_SYNC_EXPLICIT
);
449 uint64_t value
= 0, flags
;
451 flags
= AMDGPU_PTE_DEFAULT_ATC
;
452 if (level
!= AMDGPU_VM_PTB
) {
453 /* Handle leaf PDEs as PTEs */
454 flags
|= AMDGPU_PDE_PTE
;
455 amdgpu_gmc_get_vm_pde(adev
, level
, &value
, &flags
);
458 r
= vm
->update_funcs
->update(¶ms
, vmbo
, addr
, 0,
459 ats_entries
, value
, flags
);
463 addr
+= ats_entries
* 8;
467 uint64_t value
= 0, flags
= 0;
469 if (adev
->asic_type
>= CHIP_VEGA10
) {
470 if (level
!= AMDGPU_VM_PTB
) {
471 /* Handle leaf PDEs as PTEs */
472 flags
|= AMDGPU_PDE_PTE
;
473 amdgpu_gmc_get_vm_pde(adev
, level
,
476 /* Workaround for fault priority problem on GMC9 */
477 flags
= AMDGPU_PTE_EXECUTABLE
;
481 r
= vm
->update_funcs
->update(¶ms
, vmbo
, addr
, 0, entries
,
487 r
= vm
->update_funcs
->commit(¶ms
, NULL
);
494 * amdgpu_vm_pt_create - create bo for PD/PT
496 * @adev: amdgpu_device pointer
498 * @level: the page table level
499 * @immediate: use a immediate update
500 * @vmbo: pointer to the buffer object pointer
501 * @xcp_id: GPU partition id
503 int amdgpu_vm_pt_create(struct amdgpu_device
*adev
, struct amdgpu_vm
*vm
,
504 int level
, bool immediate
, struct amdgpu_bo_vm
**vmbo
,
507 struct amdgpu_bo_param bp
;
508 struct amdgpu_bo
*bo
;
509 struct dma_resv
*resv
;
510 unsigned int num_entries
;
513 memset(&bp
, 0, sizeof(bp
));
515 bp
.size
= amdgpu_vm_pt_size(adev
, level
);
516 bp
.byte_align
= AMDGPU_GPU_PAGE_SIZE
;
518 if (!adev
->gmc
.is_app_apu
)
519 bp
.domain
= AMDGPU_GEM_DOMAIN_VRAM
;
521 bp
.domain
= AMDGPU_GEM_DOMAIN_GTT
;
523 bp
.domain
= amdgpu_bo_get_preferred_domain(adev
, bp
.domain
);
524 bp
.flags
= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS
|
525 AMDGPU_GEM_CREATE_CPU_GTT_USWC
;
527 if (level
< AMDGPU_VM_PTB
)
528 num_entries
= amdgpu_vm_pt_num_entries(adev
, level
);
532 bp
.bo_ptr_size
= struct_size((*vmbo
), entries
, num_entries
);
534 if (vm
->use_cpu_for_update
)
535 bp
.flags
|= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED
;
537 bp
.type
= ttm_bo_type_kernel
;
538 bp
.no_wait_gpu
= immediate
;
539 bp
.xcp_id_plus1
= xcp_id
+ 1;
542 bp
.resv
= vm
->root
.bo
->tbo
.base
.resv
;
544 r
= amdgpu_bo_create_vm(adev
, &bp
, vmbo
);
549 if (vm
->is_compute_context
|| (adev
->flags
& AMD_IS_APU
)) {
550 (*vmbo
)->shadow
= NULL
;
555 WARN_ON(dma_resv_lock(bo
->tbo
.base
.resv
,
558 memset(&bp
, 0, sizeof(bp
));
559 bp
.size
= amdgpu_vm_pt_size(adev
, level
);
560 bp
.domain
= AMDGPU_GEM_DOMAIN_GTT
;
561 bp
.flags
= AMDGPU_GEM_CREATE_CPU_GTT_USWC
;
562 bp
.type
= ttm_bo_type_kernel
;
563 bp
.resv
= bo
->tbo
.base
.resv
;
564 bp
.bo_ptr_size
= sizeof(struct amdgpu_bo
);
565 bp
.xcp_id_plus1
= xcp_id
+ 1;
567 r
= amdgpu_bo_create(adev
, &bp
, &(*vmbo
)->shadow
);
570 dma_resv_unlock(bo
->tbo
.base
.resv
);
573 amdgpu_bo_unref(&bo
);
577 amdgpu_bo_add_to_shadow_list(*vmbo
);
583 * amdgpu_vm_pt_alloc - Allocate a specific page table
585 * @adev: amdgpu_device pointer
586 * @vm: VM to allocate page tables for
587 * @cursor: Which page table to allocate
588 * @immediate: use an immediate update
590 * Make sure a specific page table or directory is allocated.
593 * 1 if page table needed to be allocated, 0 if page table was already
594 * allocated, negative errno if an error occurred.
596 static int amdgpu_vm_pt_alloc(struct amdgpu_device
*adev
,
597 struct amdgpu_vm
*vm
,
598 struct amdgpu_vm_pt_cursor
*cursor
,
601 struct amdgpu_vm_bo_base
*entry
= cursor
->entry
;
602 struct amdgpu_bo
*pt_bo
;
603 struct amdgpu_bo_vm
*pt
;
609 amdgpu_vm_eviction_unlock(vm
);
610 r
= amdgpu_vm_pt_create(adev
, vm
, cursor
->level
, immediate
, &pt
,
611 vm
->root
.bo
->xcp_id
);
612 amdgpu_vm_eviction_lock(vm
);
616 /* Keep a reference to the root directory to avoid
617 * freeing them up in the wrong order.
620 pt_bo
->parent
= amdgpu_bo_ref(cursor
->parent
->bo
);
621 amdgpu_vm_bo_base_init(entry
, vm
, pt_bo
);
622 r
= amdgpu_vm_pt_clear(adev
, vm
, pt
, immediate
);
629 amdgpu_bo_unref(&pt
->shadow
);
630 amdgpu_bo_unref(&pt_bo
);
635 * amdgpu_vm_pt_free - free one PD/PT
637 * @entry: PDE to free
639 static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base
*entry
)
641 struct amdgpu_bo
*shadow
;
646 entry
->bo
->vm_bo
= NULL
;
647 shadow
= amdgpu_bo_shadowed(entry
->bo
);
649 ttm_bo_set_bulk_move(&shadow
->tbo
, NULL
);
650 amdgpu_bo_unref(&shadow
);
652 ttm_bo_set_bulk_move(&entry
->bo
->tbo
, NULL
);
654 spin_lock(&entry
->vm
->status_lock
);
655 list_del(&entry
->vm_status
);
656 spin_unlock(&entry
->vm
->status_lock
);
657 amdgpu_bo_unref(&entry
->bo
);
660 void amdgpu_vm_pt_free_work(struct work_struct
*work
)
662 struct amdgpu_vm_bo_base
*entry
, *next
;
663 struct amdgpu_vm
*vm
;
666 vm
= container_of(work
, struct amdgpu_vm
, pt_free_work
);
668 spin_lock(&vm
->status_lock
);
669 list_splice_init(&vm
->pt_freed
, &pt_freed
);
670 spin_unlock(&vm
->status_lock
);
672 /* flush_work in amdgpu_vm_fini ensure vm->root.bo is valid. */
673 amdgpu_bo_reserve(vm
->root
.bo
, true);
675 list_for_each_entry_safe(entry
, next
, &pt_freed
, vm_status
)
676 amdgpu_vm_pt_free(entry
);
678 amdgpu_bo_unreserve(vm
->root
.bo
);
682 * amdgpu_vm_pt_free_dfs - free PD/PT levels
684 * @adev: amdgpu device structure
685 * @vm: amdgpu vm structure
686 * @start: optional cursor where to start freeing PDs/PTs
687 * @unlocked: vm resv unlock status
689 * Free the page directory or page table level and all sub levels.
691 static void amdgpu_vm_pt_free_dfs(struct amdgpu_device
*adev
,
692 struct amdgpu_vm
*vm
,
693 struct amdgpu_vm_pt_cursor
*start
,
696 struct amdgpu_vm_pt_cursor cursor
;
697 struct amdgpu_vm_bo_base
*entry
;
700 spin_lock(&vm
->status_lock
);
701 for_each_amdgpu_vm_pt_dfs_safe(adev
, vm
, start
, cursor
, entry
)
702 list_move(&entry
->vm_status
, &vm
->pt_freed
);
705 list_move(&start
->entry
->vm_status
, &vm
->pt_freed
);
706 spin_unlock(&vm
->status_lock
);
707 schedule_work(&vm
->pt_free_work
);
711 for_each_amdgpu_vm_pt_dfs_safe(adev
, vm
, start
, cursor
, entry
)
712 amdgpu_vm_pt_free(entry
);
715 amdgpu_vm_pt_free(start
->entry
);
719 * amdgpu_vm_pt_free_root - free root PD
720 * @adev: amdgpu device structure
721 * @vm: amdgpu vm structure
723 * Free the root page directory and everything below it.
725 void amdgpu_vm_pt_free_root(struct amdgpu_device
*adev
, struct amdgpu_vm
*vm
)
727 amdgpu_vm_pt_free_dfs(adev
, vm
, NULL
, false);
731 * amdgpu_vm_pt_is_root_clean - check if a root PD is clean
733 * @adev: amdgpu_device pointer
734 * @vm: the VM to check
736 * Check all entries of the root PD, if any subsequent PDs are allocated,
737 * it means there are page table creating and filling, and is no a clean
741 * 0 if this VM is clean
743 bool amdgpu_vm_pt_is_root_clean(struct amdgpu_device
*adev
,
744 struct amdgpu_vm
*vm
)
746 enum amdgpu_vm_level root
= adev
->vm_manager
.root_level
;
747 unsigned int entries
= amdgpu_vm_pt_num_entries(adev
, root
);
750 for (i
= 0; i
< entries
; i
++) {
751 if (to_amdgpu_bo_vm(vm
->root
.bo
)->entries
[i
].bo
)
758 * amdgpu_vm_pde_update - update a single level in the hierarchy
760 * @params: parameters for the update
761 * @entry: entry to update
763 * Makes sure the requested entry in parent is up to date.
765 int amdgpu_vm_pde_update(struct amdgpu_vm_update_params
*params
,
766 struct amdgpu_vm_bo_base
*entry
)
768 struct amdgpu_vm_bo_base
*parent
= amdgpu_vm_pt_parent(entry
);
769 struct amdgpu_bo
*bo
= parent
->bo
, *pbo
;
770 struct amdgpu_vm
*vm
= params
->vm
;
771 uint64_t pde
, pt
, flags
;
774 for (level
= 0, pbo
= bo
->parent
; pbo
; ++level
)
777 level
+= params
->adev
->vm_manager
.root_level
;
778 amdgpu_gmc_get_pde_for_bo(entry
->bo
, level
, &pt
, &flags
);
779 pde
= (entry
- to_amdgpu_bo_vm(parent
->bo
)->entries
) * 8;
780 return vm
->update_funcs
->update(params
, to_amdgpu_bo_vm(bo
), pde
, pt
,
785 * amdgpu_vm_pte_update_noretry_flags - Update PTE no-retry flags
787 * @adev: amdgpu_device pointer
788 * @flags: pointer to PTE flags
790 * Update PTE no-retry flags when TF is enabled.
792 static void amdgpu_vm_pte_update_noretry_flags(struct amdgpu_device
*adev
,
796 * Update no-retry flags with the corresponding TF
797 * no-retry combination.
799 if ((*flags
& AMDGPU_VM_NORETRY_FLAGS
) == AMDGPU_VM_NORETRY_FLAGS
) {
800 *flags
&= ~AMDGPU_VM_NORETRY_FLAGS
;
801 *flags
|= adev
->gmc
.noretry_flags
;
806 * amdgpu_vm_pte_update_flags - figure out flags for PTE updates
808 * Make sure to set the right flags for the PTEs at the desired level.
810 static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params
*params
,
811 struct amdgpu_bo_vm
*pt
,
813 uint64_t pe
, uint64_t addr
,
814 unsigned int count
, uint32_t incr
,
817 struct amdgpu_device
*adev
= params
->adev
;
819 if (level
!= AMDGPU_VM_PTB
) {
820 flags
|= AMDGPU_PDE_PTE
;
821 amdgpu_gmc_get_vm_pde(adev
, level
, &addr
, &flags
);
823 } else if (adev
->asic_type
>= CHIP_VEGA10
&&
824 !(flags
& AMDGPU_PTE_VALID
) &&
825 !(flags
& AMDGPU_PTE_PRT
)) {
827 /* Workaround for fault priority problem on GMC9 */
828 flags
|= AMDGPU_PTE_EXECUTABLE
;
832 * Update no-retry flags to use the no-retry flag combination
833 * with TF enabled. The AMDGPU_VM_NORETRY_FLAGS flag combination
834 * does not work when TF is enabled. So, replace them with
835 * AMDGPU_VM_NORETRY_FLAGS_TF flag combination which works for
838 if (level
== AMDGPU_VM_PTB
)
839 amdgpu_vm_pte_update_noretry_flags(adev
, &flags
);
841 /* APUs mapping system memory may need different MTYPEs on different
842 * NUMA nodes. Only do this for contiguous ranges that can be assumed
843 * to be on the same NUMA node.
845 if ((flags
& AMDGPU_PTE_SYSTEM
) && (adev
->flags
& AMD_IS_APU
) &&
846 adev
->gmc
.gmc_funcs
->override_vm_pte_flags
&&
847 num_possible_nodes() > 1 && !params
->pages_addr
&& params
->allow_override
)
848 amdgpu_gmc_override_vm_pte_flags(adev
, params
->vm
, addr
, &flags
);
850 params
->vm
->update_funcs
->update(params
, pt
, pe
, addr
, count
, incr
,
855 * amdgpu_vm_pte_fragment - get fragment for PTEs
857 * @params: see amdgpu_vm_update_params definition
858 * @start: first PTE to handle
859 * @end: last PTE to handle
860 * @flags: hw mapping flags
861 * @frag: resulting fragment size
862 * @frag_end: end of this fragment
864 * Returns the first possible fragment for the start and end address.
866 static void amdgpu_vm_pte_fragment(struct amdgpu_vm_update_params
*params
,
867 uint64_t start
, uint64_t end
, uint64_t flags
,
868 unsigned int *frag
, uint64_t *frag_end
)
871 * The MC L1 TLB supports variable sized pages, based on a fragment
872 * field in the PTE. When this field is set to a non-zero value, page
873 * granularity is increased from 4KB to (1 << (12 + frag)). The PTE
874 * flags are considered valid for all PTEs within the fragment range
875 * and corresponding mappings are assumed to be physically contiguous.
877 * The L1 TLB can store a single PTE for the whole fragment,
878 * significantly increasing the space available for translation
879 * caching. This leads to large improvements in throughput when the
880 * TLB is under pressure.
882 * The L2 TLB distributes small and large fragments into two
883 * asymmetric partitions. The large fragment cache is significantly
884 * larger. Thus, we try to use large fragments wherever possible.
885 * Userspace can support this by aligning virtual base address and
886 * allocation size to the fragment size.
888 * Starting with Vega10 the fragment size only controls the L1. The L2
889 * is now directly feed with small/huge/giant pages from the walker.
891 unsigned int max_frag
;
893 if (params
->adev
->asic_type
< CHIP_VEGA10
)
894 max_frag
= params
->adev
->vm_manager
.fragment_size
;
898 /* system pages are non continuously */
899 if (params
->pages_addr
) {
905 /* This intentionally wraps around if no bit is set */
906 *frag
= min_t(unsigned int, ffs(start
) - 1, fls64(end
- start
) - 1);
907 if (*frag
>= max_frag
) {
909 *frag_end
= end
& ~((1ULL << max_frag
) - 1);
911 *frag_end
= start
+ (1 << *frag
);
916 * amdgpu_vm_ptes_update - make sure that page tables are valid
918 * @params: see amdgpu_vm_update_params definition
919 * @start: start of GPU address range
920 * @end: end of GPU address range
921 * @dst: destination address to map to, the next dst inside the function
922 * @flags: mapping flags
924 * Update the page tables in the range @start - @end.
927 * 0 for success, -EINVAL for failure.
929 int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params
*params
,
930 uint64_t start
, uint64_t end
,
931 uint64_t dst
, uint64_t flags
)
933 struct amdgpu_device
*adev
= params
->adev
;
934 struct amdgpu_vm_pt_cursor cursor
;
935 uint64_t frag_start
= start
, frag_end
;
939 /* figure out the initial fragment */
940 amdgpu_vm_pte_fragment(params
, frag_start
, end
, flags
, &frag
,
943 /* walk over the address space and update the PTs */
944 amdgpu_vm_pt_start(adev
, params
->vm
, start
, &cursor
);
945 while (cursor
.pfn
< end
) {
946 unsigned int shift
, parent_shift
, mask
;
947 uint64_t incr
, entry_end
, pe_start
;
948 struct amdgpu_bo
*pt
;
950 if (!params
->unlocked
) {
951 /* make sure that the page tables covering the
952 * address range are actually allocated
954 r
= amdgpu_vm_pt_alloc(params
->adev
, params
->vm
,
955 &cursor
, params
->immediate
);
960 shift
= amdgpu_vm_pt_level_shift(adev
, cursor
.level
);
961 parent_shift
= amdgpu_vm_pt_level_shift(adev
, cursor
.level
- 1);
962 if (params
->unlocked
) {
963 /* Unlocked updates are only allowed on the leaves */
964 if (amdgpu_vm_pt_descendant(adev
, &cursor
))
966 } else if (adev
->asic_type
< CHIP_VEGA10
&&
967 (flags
& AMDGPU_PTE_VALID
)) {
968 /* No huge page support before GMC v9 */
969 if (cursor
.level
!= AMDGPU_VM_PTB
) {
970 if (!amdgpu_vm_pt_descendant(adev
, &cursor
))
974 } else if (frag
< shift
) {
975 /* We can't use this level when the fragment size is
976 * smaller than the address shift. Go to the next
977 * child entry and try again.
979 if (amdgpu_vm_pt_descendant(adev
, &cursor
))
981 } else if (frag
>= parent_shift
) {
982 /* If the fragment size is even larger than the parent
983 * shift we should go up one level and check it again.
985 if (!amdgpu_vm_pt_ancestor(&cursor
))
990 pt
= cursor
.entry
->bo
;
992 /* We need all PDs and PTs for mapping something, */
993 if (flags
& AMDGPU_PTE_VALID
)
996 /* but unmapping something can happen at a higher
999 if (!amdgpu_vm_pt_ancestor(&cursor
))
1002 pt
= cursor
.entry
->bo
;
1003 shift
= parent_shift
;
1004 frag_end
= max(frag_end
, ALIGN(frag_start
+ 1,
1008 /* Looks good so far, calculate parameters for the update */
1009 incr
= (uint64_t)AMDGPU_GPU_PAGE_SIZE
<< shift
;
1010 mask
= amdgpu_vm_pt_entries_mask(adev
, cursor
.level
);
1011 pe_start
= ((cursor
.pfn
>> shift
) & mask
) * 8;
1012 entry_end
= ((uint64_t)mask
+ 1) << shift
;
1013 entry_end
+= cursor
.pfn
& ~(entry_end
- 1);
1014 entry_end
= min(entry_end
, end
);
1017 struct amdgpu_vm
*vm
= params
->vm
;
1018 uint64_t upd_end
= min(entry_end
, frag_end
);
1019 unsigned int nptes
= (upd_end
- frag_start
) >> shift
;
1020 uint64_t upd_flags
= flags
| AMDGPU_PTE_FRAG(frag
);
1022 /* This can happen when we set higher level PDs to
1023 * silent to stop fault floods.
1025 nptes
= max(nptes
, 1u);
1027 trace_amdgpu_vm_update_ptes(params
, frag_start
, upd_end
,
1028 min(nptes
, 32u), dst
, incr
,
1031 vm
->immediate
.fence_context
);
1032 amdgpu_vm_pte_update_flags(params
, to_amdgpu_bo_vm(pt
),
1033 cursor
.level
, pe_start
, dst
,
1034 nptes
, incr
, upd_flags
);
1036 pe_start
+= nptes
* 8;
1037 dst
+= nptes
* incr
;
1039 frag_start
= upd_end
;
1040 if (frag_start
>= frag_end
) {
1041 /* figure out the next fragment */
1042 amdgpu_vm_pte_fragment(params
, frag_start
, end
,
1043 flags
, &frag
, &frag_end
);
1047 } while (frag_start
< entry_end
);
1049 if (amdgpu_vm_pt_descendant(adev
, &cursor
)) {
1050 /* Free all child entries.
1051 * Update the tables with the flags and addresses and free up subsequent
1052 * tables in the case of huge pages or freed up areas.
1053 * This is the maximum you can free, because all other page tables are not
1054 * completely covered by the range and so potentially still in use.
1056 while (cursor
.pfn
< frag_start
) {
1057 /* Make sure previous mapping is freed */
1058 if (cursor
.entry
->bo
) {
1059 params
->table_freed
= true;
1060 amdgpu_vm_pt_free_dfs(adev
, params
->vm
,
1064 amdgpu_vm_pt_next(adev
, &cursor
);
1067 } else if (frag
>= shift
) {
1068 /* or just move on to the next on the same level. */
1069 amdgpu_vm_pt_next(adev
, &cursor
);
1077 * amdgpu_vm_pt_map_tables - have bo of root PD cpu accessible
1078 * @adev: amdgpu device structure
1079 * @vm: amdgpu vm structure
1081 * make root page directory and everything below it cpu accessible.
1083 int amdgpu_vm_pt_map_tables(struct amdgpu_device
*adev
, struct amdgpu_vm
*vm
)
1085 struct amdgpu_vm_pt_cursor cursor
;
1086 struct amdgpu_vm_bo_base
*entry
;
1088 for_each_amdgpu_vm_pt_dfs_safe(adev
, vm
, NULL
, cursor
, entry
) {
1090 struct amdgpu_bo_vm
*bo
;
1094 bo
= to_amdgpu_bo_vm(entry
->bo
);
1095 r
= vm
->update_funcs
->map_table(bo
);