]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
drm/amdgpu: rework how PTE flags are generated v3
authorChristian König <christian.koenig@amd.com>
Fri, 6 Jun 2025 12:13:37 +0000 (14:13 +0200)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 4 Aug 2025 18:26:38 +0000 (14:26 -0400)
Previously we tried to keep the HW specific PTE flags in each mapping,
but for CRIU that isn't sufficient any more since the original value is
needed for the checkpoint procedure.

So rework the whole handling, nuke the early mapping function, keep the
UAPI flags in each mapping instead of the HW flags and translate them to
the HW flags while filling in the PTEs.

Only tested on Navi 23 for now, so probably needs quite a bit of more
work.

v2: fix KFD and SVN handling
v3: one more SVN fix pointed out by Felix
v4: squash in gfx12 fix from David

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
16 files changed:
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
drivers/gpu/drm/amd/amdkfd/kfd_svm.c

index 260165bbe3736dc369d5bfa718020ec98d1c8698..37d8a7034a7ee561f1b32a83f5f2b81c3ccb9050 100644 (file)
@@ -494,7 +494,8 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
        return amdgpu_sync_fence(sync, vm->last_update, GFP_KERNEL);
 }
 
-static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
+static uint64_t get_pte_flags(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+                             struct kgd_mem *mem)
 {
        uint32_t mapping_flags = AMDGPU_VM_PAGE_READABLE |
                                 AMDGPU_VM_MTYPE_DEFAULT;
@@ -504,7 +505,7 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
        if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE)
                mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
 
-       return amdgpu_gem_va_map_flags(adev, mapping_flags);
+       return mapping_flags;
 }
 
 /**
@@ -961,7 +962,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
                        goto unwind;
                }
                attachment[i]->va = va;
-               attachment[i]->pte_flags = get_pte_flags(adev, mem);
+               attachment[i]->pte_flags = get_pte_flags(adev, vm, mem);
                attachment[i]->adev = adev;
                list_add(&attachment[i]->list, &mem->attachments);
 
index 6626a6e64ff5f8b67d99f30bfaee4aafee7e50fb..d5e685c5e28b909a203517a6493d21bd9edfab83 100644 (file)
@@ -790,36 +790,6 @@ error:
        return fence;
 }
 
-/**
- * amdgpu_gem_va_map_flags - map GEM UAPI flags into hardware flags
- *
- * @adev: amdgpu_device pointer
- * @flags: GEM UAPI flags
- *
- * Returns the GEM UAPI flags mapped into hardware for the ASIC.
- */
-uint64_t amdgpu_gem_va_map_flags(struct amdgpu_device *adev, uint32_t flags)
-{
-       uint64_t pte_flag = 0;
-
-       if (flags & AMDGPU_VM_PAGE_EXECUTABLE)
-               pte_flag |= AMDGPU_PTE_EXECUTABLE;
-       if (flags & AMDGPU_VM_PAGE_READABLE)
-               pte_flag |= AMDGPU_PTE_READABLE;
-       if (flags & AMDGPU_VM_PAGE_WRITEABLE)
-               pte_flag |= AMDGPU_PTE_WRITEABLE;
-       if (flags & AMDGPU_VM_PAGE_PRT)
-               pte_flag |= AMDGPU_PTE_PRT_FLAG(adev);
-       if (flags & AMDGPU_VM_PAGE_NOALLOC)
-               pte_flag |= AMDGPU_PTE_NOALLOC;
-
-       if (adev->gmc.gmc_funcs->map_mtype)
-               pte_flag |= amdgpu_gmc_map_mtype(adev,
-                                                flags & AMDGPU_VM_MTYPE_MASK);
-
-       return pte_flag;
-}
-
 int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
                          struct drm_file *filp)
 {
@@ -840,7 +810,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
        struct dma_fence_chain *timeline_chain = NULL;
        struct dma_fence *fence;
        struct drm_exec exec;
-       uint64_t va_flags;
        uint64_t vm_size;
        int r = 0;
 
@@ -944,10 +913,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 
        switch (args->operation) {
        case AMDGPU_VA_OP_MAP:
-               va_flags = amdgpu_gem_va_map_flags(adev, args->flags);
                r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,
                                     args->offset_in_bo, args->map_size,
-                                    va_flags);
+                                    args->flags);
                break;
        case AMDGPU_VA_OP_UNMAP:
                r = amdgpu_vm_bo_unmap(adev, bo_va, args->va_address);
@@ -959,10 +927,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
                                                args->map_size);
                break;
        case AMDGPU_VA_OP_REPLACE:
-               va_flags = amdgpu_gem_va_map_flags(adev, args->flags);
                r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address,
                                             args->offset_in_bo, args->map_size,
-                                            va_flags);
+                                            args->flags);
                break;
        default:
                break;
index 3a8f57900a3aaf23befa1f41c373386aac589746..b51e8f95ee86d3e4067e981e0e670acb305498a0 100644 (file)
@@ -63,7 +63,6 @@ int amdgpu_gem_mmap_ioctl(struct drm_device *dev, void *data,
                          struct drm_file *filp);
 int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
                              struct drm_file *filp);
-uint64_t amdgpu_gem_va_map_flags(struct amdgpu_device *adev, uint32_t flags);
 int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
                          struct drm_file *filp);
 int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
index 397c6ccdb9033bac752e5aa5ee8f566d428bdc0f..55097ca107382552e1de6d9cd344557a51a59f3c 100644 (file)
@@ -154,15 +154,15 @@ struct amdgpu_gmc_funcs {
                                   unsigned pasid);
        /* enable/disable PRT support */
        void (*set_prt)(struct amdgpu_device *adev, bool enable);
-       /* map mtype to hardware flags */
-       uint64_t (*map_mtype)(struct amdgpu_device *adev, uint32_t flags);
        /* get the pde for a given mc addr */
        void (*get_vm_pde)(struct amdgpu_device *adev, int level,
                           u64 *dst, u64 *flags);
-       /* get the pte flags to use for a BO VA mapping */
+       /* get the pte flags to use for PTEs */
        void (*get_vm_pte)(struct amdgpu_device *adev,
-                          struct amdgpu_bo_va_mapping *mapping,
-                          uint64_t *flags);
+                          struct amdgpu_vm *vm,
+                          struct amdgpu_bo *bo,
+                          uint32_t vm_flags,
+                          uint64_t *pte_flags);
        /* override per-page pte flags */
        void (*override_vm_pte_flags)(struct amdgpu_device *dev,
                                      struct amdgpu_vm *vm,
@@ -356,9 +356,10 @@ struct amdgpu_gmc {
 
 #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
 #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
-#define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags))
 #define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags))
-#define amdgpu_gmc_get_vm_pte(adev, mapping, flags) (adev)->gmc.gmc_funcs->get_vm_pte((adev), (mapping), (flags))
+#define amdgpu_gmc_get_vm_pte(adev, vm, bo, vm_flags, pte_flags) \
+       ((adev)->gmc.gmc_funcs->get_vm_pte((adev), (vm), (bo), (vm_flags), \
+                                          (pte_flags)))
 #define amdgpu_gmc_override_vm_pte_flags(adev, vm, addr, pte_flags)    \
        (adev)->gmc.gmc_funcs->override_vm_pte_flags                    \
                ((adev), (vm), (addr), (pte_flags))
index c316920f34509e630deb5fc97ab2346eae2f4aeb..87523fcd438639b55cfd26076f6564b859fdcabd 100644 (file)
@@ -69,7 +69,7 @@ struct amdgpu_bo_va_mapping {
        uint64_t                        last;
        uint64_t                        __subtree_last;
        uint64_t                        offset;
-       uint64_t                        flags;
+       uint32_t                        flags;
 };
 
 /* User space allocated BO in a VM */
index d45ebfb642ca94f630f7bdbf436b946138668015..a0b479d5fff1910e9ca31e2a899061af536e60d4 100644 (file)
@@ -67,9 +67,9 @@ static inline u64 amdgpu_seq64_get_va_base(struct amdgpu_device *adev)
 int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm,
                     struct amdgpu_bo_va **bo_va)
 {
-       u64 seq64_addr, va_flags;
        struct amdgpu_bo *bo;
        struct drm_exec exec;
+       u64 seq64_addr;
        int r;
 
        bo = adev->seq64.sbo;
@@ -94,9 +94,9 @@ int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 
        seq64_addr = amdgpu_seq64_get_va_base(adev) & AMDGPU_GMC_HOLE_MASK;
 
-       va_flags = amdgpu_gem_va_map_flags(adev, AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_MTYPE_UC);
-       r = amdgpu_vm_bo_map(adev, *bo_va, seq64_addr, 0, AMDGPU_VA_RESERVED_SEQ64_SIZE,
-                            va_flags);
+       r = amdgpu_vm_bo_map(adev, *bo_va, seq64_addr, 0,
+                            AMDGPU_VA_RESERVED_SEQ64_SIZE,
+                            AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_MTYPE_UC);
        if (r) {
                DRM_ERROR("failed to do bo_map on userq sem, err=%d\n", r);
                amdgpu_vm_bo_del(adev, *bo_va);
index 5cacf5717016aaa64968be485805e0c48f88cb10..39b4250ede0ffb54e11c214628bd17af4eb372f6 100644 (file)
@@ -1332,13 +1332,14 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
                /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
                 * but in case of something, we filter the flags in first place
                 */
-               if (!(mapping->flags & AMDGPU_PTE_READABLE))
+               if (!(mapping->flags & AMDGPU_VM_PAGE_READABLE))
                        update_flags &= ~AMDGPU_PTE_READABLE;
-               if (!(mapping->flags & AMDGPU_PTE_WRITEABLE))
+               if (!(mapping->flags & AMDGPU_VM_PAGE_WRITEABLE))
                        update_flags &= ~AMDGPU_PTE_WRITEABLE;
 
                /* Apply ASIC specific mapping flags */
-               amdgpu_gmc_get_vm_pte(adev, mapping, &update_flags);
+               amdgpu_gmc_get_vm_pte(adev, vm, bo, mapping->flags,
+                                     &update_flags);
 
                trace_amdgpu_vm_bo_update(mapping);
 
@@ -1479,7 +1480,7 @@ static void amdgpu_vm_free_mapping(struct amdgpu_device *adev,
                                   struct amdgpu_bo_va_mapping *mapping,
                                   struct dma_fence *fence)
 {
-       if (mapping->flags & AMDGPU_PTE_PRT_FLAG(adev))
+       if (mapping->flags & AMDGPU_VM_PAGE_PRT)
                amdgpu_vm_add_prt_cb(adev, fence);
        kfree(mapping);
 }
@@ -1758,7 +1759,7 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
        list_add(&mapping->list, &bo_va->invalids);
        amdgpu_vm_it_insert(mapping, &vm->va);
 
-       if (mapping->flags & AMDGPU_PTE_PRT_FLAG(adev))
+       if (mapping->flags & AMDGPU_VM_PAGE_PRT)
                amdgpu_vm_prt_get(adev);
 
        if (amdgpu_vm_is_bo_always_valid(vm, bo) && !bo_va->base.moved)
@@ -1818,7 +1819,7 @@ static int amdgpu_vm_verify_parameters(struct amdgpu_device *adev,
 int amdgpu_vm_bo_map(struct amdgpu_device *adev,
                     struct amdgpu_bo_va *bo_va,
                     uint64_t saddr, uint64_t offset,
-                    uint64_t size, uint64_t flags)
+                    uint64_t size, uint32_t flags)
 {
        struct amdgpu_bo_va_mapping *mapping, *tmp;
        struct amdgpu_bo *bo = bo_va->base.bo;
@@ -1877,7 +1878,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
 int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
                             struct amdgpu_bo_va *bo_va,
                             uint64_t saddr, uint64_t offset,
-                            uint64_t size, uint64_t flags)
+                            uint64_t size, uint32_t flags)
 {
        struct amdgpu_bo_va_mapping *mapping;
        struct amdgpu_bo *bo = bo_va->base.bo;
@@ -2734,7 +2735,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
        dma_fence_put(vm->last_tlb_flush);
 
        list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
-               if (mapping->flags & AMDGPU_PTE_PRT_FLAG(adev) && prt_fini_needed) {
+               if (mapping->flags & AMDGPU_VM_PAGE_PRT && prt_fini_needed) {
                        amdgpu_vm_prt_fini(adev, vm);
                        prt_fini_needed = false;
                }
index fd086efd8457e39bf74555a3eff90fb6cea60df1..3b4fa3246675fcc6264e28d46f9761b42f7b2461 100644 (file)
@@ -538,11 +538,11 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
 int amdgpu_vm_bo_map(struct amdgpu_device *adev,
                     struct amdgpu_bo_va *bo_va,
                     uint64_t addr, uint64_t offset,
-                    uint64_t size, uint64_t flags);
+                    uint64_t size, uint32_t flags);
 int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
                             struct amdgpu_bo_va *bo_va,
                             uint64_t addr, uint64_t offset,
-                            uint64_t size, uint64_t flags);
+                            uint64_t size, uint32_t flags);
 int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
                       struct amdgpu_bo_va *bo_va,
                       uint64_t addr);
index 7923f491cf7335d7a7e224520e0564f03b567bac..7031dd8c3c5eb84d1f4ccbd30ea8a77799ae4e1e 100644 (file)
@@ -466,24 +466,6 @@ static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int
  * 0 valid
  */
 
-static uint64_t gmc_v10_0_map_mtype(struct amdgpu_device *adev, uint32_t flags)
-{
-       switch (flags) {
-       case AMDGPU_VM_MTYPE_DEFAULT:
-               return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_NC);
-       case AMDGPU_VM_MTYPE_NC:
-               return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_NC);
-       case AMDGPU_VM_MTYPE_WC:
-               return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_WC);
-       case AMDGPU_VM_MTYPE_CC:
-               return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_CC);
-       case AMDGPU_VM_MTYPE_UC:
-               return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_UC);
-       default:
-               return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_NC);
-       }
-}
-
 static void gmc_v10_0_get_vm_pde(struct amdgpu_device *adev, int level,
                                 uint64_t *addr, uint64_t *flags)
 {
@@ -508,21 +490,39 @@ static void gmc_v10_0_get_vm_pde(struct amdgpu_device *adev, int level,
 }
 
 static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev,
-                                struct amdgpu_bo_va_mapping *mapping,
+                                struct amdgpu_vm *vm,
+                                struct amdgpu_bo *bo,
+                                uint32_t vm_flags,
                                 uint64_t *flags)
 {
-       struct amdgpu_bo *bo = mapping->bo_va->base.bo;
-
-       *flags &= ~AMDGPU_PTE_EXECUTABLE;
-       *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
+       if (vm_flags & AMDGPU_VM_PAGE_EXECUTABLE)
+               *flags |= AMDGPU_PTE_EXECUTABLE;
+       else
+               *flags &= ~AMDGPU_PTE_EXECUTABLE;
 
-       *flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK;
-       *flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK);
+       switch (vm_flags & AMDGPU_VM_MTYPE_MASK) {
+       case AMDGPU_VM_MTYPE_DEFAULT:
+       case AMDGPU_VM_MTYPE_NC:
+       default:
+               *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_NC);
+               break;
+       case AMDGPU_VM_MTYPE_WC:
+               *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_WC);
+               break;
+       case AMDGPU_VM_MTYPE_CC:
+               *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_CC);
+               break;
+       case AMDGPU_VM_MTYPE_UC:
+               *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_UC);
+               break;
+       }
 
-       *flags &= ~AMDGPU_PTE_NOALLOC;
-       *flags |= (mapping->flags & AMDGPU_PTE_NOALLOC);
+       if (vm_flags & AMDGPU_VM_PAGE_NOALLOC)
+               *flags |= AMDGPU_PTE_NOALLOC;
+       else
+               *flags &= ~AMDGPU_PTE_NOALLOC;
 
-       if (mapping->flags & AMDGPU_PTE_PRT) {
+       if (vm_flags & AMDGPU_VM_PAGE_PRT) {
                *flags |= AMDGPU_PTE_PRT;
                *flags |= AMDGPU_PTE_SNOOPED;
                *flags |= AMDGPU_PTE_LOG;
@@ -563,7 +563,6 @@ static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = {
        .flush_gpu_tlb_pasid = gmc_v10_0_flush_gpu_tlb_pasid,
        .emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb,
        .emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping,
-       .map_mtype = gmc_v10_0_map_mtype,
        .get_vm_pde = gmc_v10_0_get_vm_pde,
        .get_vm_pte = gmc_v10_0_get_vm_pte,
        .get_vbios_fb_size = gmc_v10_0_get_vbios_fb_size,
index f15d691e9a2031554510c2a2fd83ca8db17f7bf8..93d2b0bbe64190bba32f081a7e783b6bffc7ceb7 100644 (file)
@@ -430,24 +430,6 @@ static void gmc_v11_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int
  * 0 valid
  */
 
-static uint64_t gmc_v11_0_map_mtype(struct amdgpu_device *adev, uint32_t flags)
-{
-       switch (flags) {
-       case AMDGPU_VM_MTYPE_DEFAULT:
-               return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_NC);
-       case AMDGPU_VM_MTYPE_NC:
-               return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_NC);
-       case AMDGPU_VM_MTYPE_WC:
-               return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_WC);
-       case AMDGPU_VM_MTYPE_CC:
-               return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_CC);
-       case AMDGPU_VM_MTYPE_UC:
-               return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_UC);
-       default:
-               return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_NC);
-       }
-}
-
 static void gmc_v11_0_get_vm_pde(struct amdgpu_device *adev, int level,
                                 uint64_t *addr, uint64_t *flags)
 {
@@ -472,21 +454,39 @@ static void gmc_v11_0_get_vm_pde(struct amdgpu_device *adev, int level,
 }
 
 static void gmc_v11_0_get_vm_pte(struct amdgpu_device *adev,
-                                struct amdgpu_bo_va_mapping *mapping,
+                                struct amdgpu_vm *vm,
+                                struct amdgpu_bo *bo,
+                                uint32_t vm_flags,
                                 uint64_t *flags)
 {
-       struct amdgpu_bo *bo = mapping->bo_va->base.bo;
-
-       *flags &= ~AMDGPU_PTE_EXECUTABLE;
-       *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
+       if (vm_flags & AMDGPU_VM_PAGE_EXECUTABLE)
+               *flags |= AMDGPU_PTE_EXECUTABLE;
+       else
+               *flags &= ~AMDGPU_PTE_EXECUTABLE;
 
-       *flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK;
-       *flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK);
+       switch (vm_flags & AMDGPU_VM_MTYPE_MASK) {
+       case AMDGPU_VM_MTYPE_DEFAULT:
+       case AMDGPU_VM_MTYPE_NC:
+       default:
+               *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_NC);
+               break;
+       case AMDGPU_VM_MTYPE_WC:
+               *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_WC);
+               break;
+       case AMDGPU_VM_MTYPE_CC:
+               *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_CC);
+               break;
+       case AMDGPU_VM_MTYPE_UC:
+               *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_UC);
+               break;
+       }
 
-       *flags &= ~AMDGPU_PTE_NOALLOC;
-       *flags |= (mapping->flags & AMDGPU_PTE_NOALLOC);
+       if (vm_flags & AMDGPU_VM_PAGE_NOALLOC)
+               *flags |= AMDGPU_PTE_NOALLOC;
+       else
+               *flags &= ~AMDGPU_PTE_NOALLOC;
 
-       if (mapping->flags & AMDGPU_PTE_PRT) {
+       if (vm_flags & AMDGPU_VM_PAGE_PRT) {
                *flags |= AMDGPU_PTE_PRT;
                *flags |= AMDGPU_PTE_SNOOPED;
                *flags |= AMDGPU_PTE_LOG;
@@ -527,7 +527,6 @@ static const struct amdgpu_gmc_funcs gmc_v11_0_gmc_funcs = {
        .flush_gpu_tlb_pasid = gmc_v11_0_flush_gpu_tlb_pasid,
        .emit_flush_gpu_tlb = gmc_v11_0_emit_flush_gpu_tlb,
        .emit_pasid_mapping = gmc_v11_0_emit_pasid_mapping,
-       .map_mtype = gmc_v11_0_map_mtype,
        .get_vm_pde = gmc_v11_0_get_vm_pde,
        .get_vm_pte = gmc_v11_0_get_vm_pte,
        .get_vbios_fb_size = gmc_v11_0_get_vbios_fb_size,
index de763105fdfd162a9f5de42766b02e39bf32decc..a0a5367f9dc40e560e2922dcd179a3b981af4830 100644 (file)
@@ -453,20 +453,6 @@ static void gmc_v12_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid
  * 0 valid
  */
 
-static uint64_t gmc_v12_0_map_mtype(struct amdgpu_device *adev, uint32_t flags)
-{
-       switch (flags) {
-       case AMDGPU_VM_MTYPE_DEFAULT:
-               return AMDGPU_PTE_MTYPE_GFX12(0ULL, MTYPE_NC);
-       case AMDGPU_VM_MTYPE_NC:
-               return AMDGPU_PTE_MTYPE_GFX12(0ULL, MTYPE_NC);
-       case AMDGPU_VM_MTYPE_UC:
-               return AMDGPU_PTE_MTYPE_GFX12(0ULL, MTYPE_UC);
-       default:
-               return AMDGPU_PTE_MTYPE_GFX12(0ULL, MTYPE_NC);
-       }
-}
-
 static void gmc_v12_0_get_vm_pde(struct amdgpu_device *adev, int level,
                                 uint64_t *addr, uint64_t *flags)
 {
@@ -490,19 +476,35 @@ static void gmc_v12_0_get_vm_pde(struct amdgpu_device *adev, int level,
 }
 
 static void gmc_v12_0_get_vm_pte(struct amdgpu_device *adev,
-                                struct amdgpu_bo_va_mapping *mapping,
+                                struct amdgpu_vm *vm,
+                                struct amdgpu_bo *bo,
+                                uint32_t vm_flags,
                                 uint64_t *flags)
 {
-       struct amdgpu_bo *bo = mapping->bo_va->base.bo;
+       if (vm_flags & AMDGPU_VM_PAGE_EXECUTABLE)
+               *flags |= AMDGPU_PTE_EXECUTABLE;
+       else
+               *flags &= ~AMDGPU_PTE_EXECUTABLE;
 
-       *flags &= ~AMDGPU_PTE_EXECUTABLE;
-       *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
+       switch (vm_flags & AMDGPU_VM_MTYPE_MASK) {
+       case AMDGPU_VM_MTYPE_DEFAULT:
+               *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_NC);
+               break;
+       case AMDGPU_VM_MTYPE_NC:
+       default:
+               *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_NC);
+               break;
+       case AMDGPU_VM_MTYPE_UC:
+               *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_UC);
+               break;
+       }
 
-       *flags &= ~AMDGPU_PTE_MTYPE_GFX12_MASK;
-       *flags |= (mapping->flags & AMDGPU_PTE_MTYPE_GFX12_MASK);
+       if (vm_flags & AMDGPU_VM_PAGE_NOALLOC)
+               *flags |= AMDGPU_PTE_NOALLOC;
+       else
+               *flags &= ~AMDGPU_PTE_NOALLOC;
 
-       if (mapping->flags & AMDGPU_PTE_PRT_GFX12) {
-               *flags |= AMDGPU_PTE_PRT_GFX12;
+       if (vm_flags & AMDGPU_VM_PAGE_PRT) {
                *flags |= AMDGPU_PTE_SNOOPED;
                *flags |= AMDGPU_PTE_SYSTEM;
                *flags |= AMDGPU_PTE_IS_PTE;
@@ -543,7 +545,6 @@ static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = {
        .flush_gpu_tlb_pasid = gmc_v12_0_flush_gpu_tlb_pasid,
        .emit_flush_gpu_tlb = gmc_v12_0_emit_flush_gpu_tlb,
        .emit_pasid_mapping = gmc_v12_0_emit_pasid_mapping,
-       .map_mtype = gmc_v12_0_map_mtype,
        .get_vm_pde = gmc_v12_0_get_vm_pde,
        .get_vm_pte = gmc_v12_0_get_vm_pte,
        .get_vbios_fb_size = gmc_v12_0_get_vbios_fb_size,
index 8030fcd64210674d82ae9be9d5de3d1c4b663ad0..f6ad7911f1e6f07d6e3f1970176fda9b6b4f7551 100644 (file)
@@ -382,7 +382,9 @@ static void gmc_v6_0_get_vm_pde(struct amdgpu_device *adev, int level,
 }
 
 static void gmc_v6_0_get_vm_pte(struct amdgpu_device *adev,
-                               struct amdgpu_bo_va_mapping *mapping,
+                               struct amdgpu_vm *vm,
+                               struct amdgpu_bo *bo,
+                               uint32_t vm_flags,
                                uint64_t *flags)
 {
        *flags &= ~AMDGPU_PTE_EXECUTABLE;
index a8d5795084fc97576fa83926e107ed53806958de..93d7ccb7d013ad9eb54abe2560182de5e1a0df0d 100644 (file)
@@ -504,7 +504,9 @@ static void gmc_v7_0_get_vm_pde(struct amdgpu_device *adev, int level,
 }
 
 static void gmc_v7_0_get_vm_pte(struct amdgpu_device *adev,
-                               struct amdgpu_bo_va_mapping *mapping,
+                               struct amdgpu_vm *vm,
+                               struct amdgpu_bo *bo,
+                               uint32_t vm_flags,
                                uint64_t *flags)
 {
        *flags &= ~AMDGPU_PTE_EXECUTABLE;
index b45fa0cea9d27dd4c783e0dfe68097b4f6c6c49b..c5e2a2c41e06555d0f6ca6f81a5649c0adea9b37 100644 (file)
@@ -716,11 +716,15 @@ static void gmc_v8_0_get_vm_pde(struct amdgpu_device *adev, int level,
 }
 
 static void gmc_v8_0_get_vm_pte(struct amdgpu_device *adev,
-                               struct amdgpu_bo_va_mapping *mapping,
+                               struct amdgpu_vm *vm,
+                               struct amdgpu_bo *bo,
+                               uint32_t vm_flags,
                                uint64_t *flags)
 {
-       *flags &= ~AMDGPU_PTE_EXECUTABLE;
-       *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
+       if (vm_flags & AMDGPU_VM_PAGE_EXECUTABLE)
+               *flags |= AMDGPU_PTE_EXECUTABLE;
+       else
+               *flags &= ~AMDGPU_PTE_EXECUTABLE;
        *flags &= ~AMDGPU_PTE_PRT;
 }
 
index c4d69cf4e06c7e6f9a5e2088405f30d7b9719574..8404695eb13fe33326a07e133c6648a5a9b45609 100644 (file)
@@ -1073,27 +1073,6 @@ static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int v
  * 0 valid
  */
 
-static uint64_t gmc_v9_0_map_mtype(struct amdgpu_device *adev, uint32_t flags)
-
-{
-       switch (flags) {
-       case AMDGPU_VM_MTYPE_DEFAULT:
-               return AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_NC);
-       case AMDGPU_VM_MTYPE_NC:
-               return AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_NC);
-       case AMDGPU_VM_MTYPE_WC:
-               return AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_WC);
-       case AMDGPU_VM_MTYPE_RW:
-               return AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_RW);
-       case AMDGPU_VM_MTYPE_CC:
-               return AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_CC);
-       case AMDGPU_VM_MTYPE_UC:
-               return AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_UC);
-       default:
-               return AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_NC);
-       }
-}
-
 static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
                                uint64_t *addr, uint64_t *flags)
 {
@@ -1123,6 +1102,7 @@ static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
 static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
                                         struct amdgpu_vm *vm,
                                         struct amdgpu_bo *bo,
+                                        uint32_t vm_flags,
                                         uint64_t *flags)
 {
        struct amdgpu_device *bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
@@ -1236,25 +1216,43 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
 }
 
 static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
-                               struct amdgpu_bo_va_mapping *mapping,
+                               struct amdgpu_vm *vm,
+                               struct amdgpu_bo *bo,
+                               uint32_t vm_flags,
                                uint64_t *flags)
 {
-       struct amdgpu_bo *bo = mapping->bo_va->base.bo;
-
-       *flags &= ~AMDGPU_PTE_EXECUTABLE;
-       *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
+       if (vm_flags & AMDGPU_VM_PAGE_EXECUTABLE)
+               *flags |= AMDGPU_PTE_EXECUTABLE;
+       else
+               *flags &= ~AMDGPU_PTE_EXECUTABLE;
 
-       *flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
-       *flags |= mapping->flags & AMDGPU_PTE_MTYPE_VG10_MASK;
+       switch (vm_flags & AMDGPU_VM_MTYPE_MASK) {
+       case AMDGPU_VM_MTYPE_DEFAULT:
+       case AMDGPU_VM_MTYPE_NC:
+       default:
+               *flags = AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_NC);
+               break;
+       case AMDGPU_VM_MTYPE_WC:
+               *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_WC);
+               break;
+       case AMDGPU_VM_MTYPE_RW:
+               *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_RW);
+               break;
+       case AMDGPU_VM_MTYPE_CC:
+               *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_CC);
+               break;
+       case AMDGPU_VM_MTYPE_UC:
+               *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_UC);
+               break;
+       }
 
-       if (mapping->flags & AMDGPU_PTE_PRT) {
+       if (vm_flags & AMDGPU_VM_PAGE_PRT) {
                *flags |= AMDGPU_PTE_PRT;
                *flags &= ~AMDGPU_PTE_VALID;
        }
 
        if ((*flags & AMDGPU_PTE_VALID) && bo)
-               gmc_v9_0_get_coherence_flags(adev, mapping->bo_va->base.vm, bo,
-                                            flags);
+               gmc_v9_0_get_coherence_flags(adev, vm, bo, vm_flags, flags);
 }
 
 static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev,
@@ -1391,7 +1389,6 @@ static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
        .flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
        .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
        .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
-       .map_mtype = gmc_v9_0_map_mtype,
        .get_vm_pde = gmc_v9_0_get_vm_pde,
        .get_vm_pte = gmc_v9_0_get_vm_pte,
        .override_vm_pte_flags = gmc_v9_0_override_vm_pte_flags,
index a0f22ea6d15af73b9c6cacb781a7354b2791f32a..e23b5a0f31f2bcedb1e543f6845631125180183d 100644 (file)
@@ -1189,7 +1189,7 @@ svm_nodes_in_same_hive(struct kfd_node *node_a, struct kfd_node *node_b)
 }
 
 static uint64_t
-svm_range_get_pte_flags(struct kfd_node *node,
+svm_range_get_pte_flags(struct kfd_node *node, struct amdgpu_vm *vm,
                        struct svm_range *prange, int domain)
 {
        struct kfd_node *bo_node;
@@ -1292,10 +1292,6 @@ svm_range_get_pte_flags(struct kfd_node *node,
                        AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
        }
 
-       mapping_flags |= AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE;
-
-       if (flags & KFD_IOCTL_SVM_FLAG_GPU_RO)
-               mapping_flags &= ~AMDGPU_VM_PAGE_WRITEABLE;
        if (flags & KFD_IOCTL_SVM_FLAG_GPU_EXEC)
                mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
 
@@ -1305,7 +1301,10 @@ svm_range_get_pte_flags(struct kfd_node *node,
        if (gc_ip_version >= IP_VERSION(12, 0, 0))
                pte_flags |= AMDGPU_PTE_IS_PTE;
 
-       pte_flags |= amdgpu_gem_va_map_flags(node->adev, mapping_flags);
+       amdgpu_gmc_get_vm_pte(node->adev, vm, NULL, mapping_flags, &pte_flags);
+       pte_flags |= AMDGPU_PTE_READABLE;
+       if (!(flags & KFD_IOCTL_SVM_FLAG_GPU_RO))
+               pte_flags |= AMDGPU_PTE_WRITEABLE;
        return pte_flags;
 }
 
@@ -1412,7 +1411,7 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
                pr_debug("Mapping range [0x%lx 0x%llx] on domain: %s\n",
                         last_start, prange->start + i, last_domain ? "GPU" : "CPU");
 
-               pte_flags = svm_range_get_pte_flags(pdd->dev, prange, last_domain);
+               pte_flags = svm_range_get_pte_flags(pdd->dev, vm, prange, last_domain);
                if (readonly)
                        pte_flags &= ~AMDGPU_PTE_WRITEABLE;