]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
drm/amdgpu: track bo memory stats at runtime
authorYunxiang Li <Yunxiang.Li@amd.com>
Thu, 19 Dec 2024 15:14:11 +0000 (10:14 -0500)
committerChristian König <christian.koenig@amd.com>
Thu, 19 Dec 2024 15:56:28 +0000 (16:56 +0100)
Before, every time fdinfo is queried we try to lock all the BOs in the
VM and calculate memory usage from scratch. This works okay if the
fdinfo is rarely read and the VMs don't have a ton of BOs. If either of
these conditions is not true, we get a massive performance hit.

In this new revision, we track the BOs as they change states. This way
when the fdinfo is queried we only need to take the status lock and copy
out the usage stats with minimal impact to the runtime performance. With
this new approach however, we would no longer be able to track active
buffers.

Signed-off-by: Yunxiang Li <Yunxiang.Li@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241219151411.1150-6-Yunxiang.Li@amd.com
Signed-off-by: Christian König <christian.koenig@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c

index b144404902255d1bb9f19ed578ddb6c167715921..9f627caedc3f6144a8b8e679ab67ee049cebbb4b 100644 (file)
@@ -36,6 +36,7 @@
 #include "amdgpu_gem.h"
 #include "amdgpu_dma_buf.h"
 #include "amdgpu_xgmi.h"
+#include "amdgpu_vm.h"
 #include <drm/amdgpu_drm.h>
 #include <drm/ttm/ttm_tt.h>
 #include <linux/dma-buf.h>
@@ -60,6 +61,8 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
        if (pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0)
                attach->peer2peer = false;
 
+       amdgpu_vm_bo_update_shared(bo);
+
        return 0;
 }
 
index 7717e3e4f05b5a3d1d5a2a81714e075fba8c8e4c..91d638098889d61c2460b3cb2e16ff440fe5beaf 100644 (file)
@@ -60,7 +60,7 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file)
        struct amdgpu_fpriv *fpriv = file->driver_priv;
        struct amdgpu_vm *vm = &fpriv->vm;
 
-       struct amdgpu_mem_stats stats[__AMDGPU_PL_LAST + 1] = { };
+       struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM];
        ktime_t usage[AMDGPU_HW_IP_NUM];
        const char *pl_name[] = {
                [TTM_PL_VRAM] = "vram",
@@ -72,15 +72,8 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file)
                [AMDGPU_PL_DOORBELL] = "doorbell",
        };
        unsigned int hw_ip, i;
-       int ret;
-
-       ret = amdgpu_bo_reserve(vm->root.bo, false);
-       if (ret)
-               return;
-
-       amdgpu_vm_get_memory(vm, stats, ARRAY_SIZE(stats));
-       amdgpu_bo_unreserve(vm->root.bo);
 
+       amdgpu_vm_get_memory(vm, stats);
        amdgpu_ctx_mgr_usage(&fpriv->ctx_mgr, usage);
 
        /*
@@ -97,7 +90,6 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file)
 
                drm_print_memory_stats(p,
                                       &stats[i].drm,
-                                      DRM_GEM_OBJECT_ACTIVE |
                                       DRM_GEM_OBJECT_RESIDENT |
                                       DRM_GEM_OBJECT_PURGEABLE,
                                       pl_name[i]);
@@ -115,9 +107,11 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file)
        drm_printf(p, "amd-evicted-vram:\t%llu KiB\n",
                   stats[TTM_PL_VRAM].evicted/1024UL);
        drm_printf(p, "amd-requested-vram:\t%llu KiB\n",
-                  stats[TTM_PL_VRAM].requested/1024UL);
+                  (stats[TTM_PL_VRAM].drm.shared +
+                   stats[TTM_PL_VRAM].drm.private) / 1024UL);
        drm_printf(p, "amd-requested-gtt:\t%llu KiB\n",
-                  stats[TTM_PL_TT].requested/1024UL);
+                  (stats[TTM_PL_TT].drm.shared +
+                   stats[TTM_PL_TT].drm.private) / 1024UL);
 
        for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
                if (!usage[hw_ip])
index bc1ad6cdf03649b21a842abbe92a87c82b254982..103513b1d23fb9b1069902e26adfd8ee7df1d243 100644 (file)
@@ -42,6 +42,7 @@
 #include "amdgpu_dma_buf.h"
 #include "amdgpu_hmm.h"
 #include "amdgpu_xgmi.h"
+#include "amdgpu_vm.h"
 
 static vm_fault_t amdgpu_gem_fault(struct vm_fault *vmf)
 {
@@ -179,6 +180,7 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj,
        if (r)
                return r;
 
+       amdgpu_vm_bo_update_shared(abo);
        bo_va = amdgpu_vm_bo_find(vm, abo);
        if (!bo_va)
                bo_va = amdgpu_vm_bo_add(adev, vm, abo);
@@ -252,6 +254,7 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj,
                goto out_unlock;
 
        amdgpu_vm_bo_del(adev, bo_va);
+       amdgpu_vm_bo_update_shared(bo);
        if (!amdgpu_vm_ready(vm))
                goto out_unlock;
 
index a9aaf8f7cb4f1faa38b61726672079b7563af378..fc94b8b9b86d571f233f7e7012a7d902ffbd7b95 100644 (file)
@@ -1157,7 +1157,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
                return;
 
        abo = ttm_to_amdgpu_bo(bo);
-       amdgpu_vm_bo_invalidate(abo, evict);
+       amdgpu_vm_bo_move(abo, new_mem, evict);
 
        amdgpu_bo_kunmap(abo);
 
@@ -1170,75 +1170,6 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
                             old_mem ? old_mem->mem_type : -1);
 }
 
-void amdgpu_bo_get_memory(struct amdgpu_bo *bo,
-                         struct amdgpu_mem_stats *stats,
-                         unsigned int sz)
-{
-       const unsigned int domain_to_pl[] = {
-               [ilog2(AMDGPU_GEM_DOMAIN_CPU)]      = TTM_PL_SYSTEM,
-               [ilog2(AMDGPU_GEM_DOMAIN_GTT)]      = TTM_PL_TT,
-               [ilog2(AMDGPU_GEM_DOMAIN_VRAM)]     = TTM_PL_VRAM,
-               [ilog2(AMDGPU_GEM_DOMAIN_GDS)]      = AMDGPU_PL_GDS,
-               [ilog2(AMDGPU_GEM_DOMAIN_GWS)]      = AMDGPU_PL_GWS,
-               [ilog2(AMDGPU_GEM_DOMAIN_OA)]       = AMDGPU_PL_OA,
-               [ilog2(AMDGPU_GEM_DOMAIN_DOORBELL)] = AMDGPU_PL_DOORBELL,
-       };
-       struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-       struct ttm_resource *res = bo->tbo.resource;
-       struct drm_gem_object *obj = &bo->tbo.base;
-       uint64_t size = amdgpu_bo_size(bo);
-       unsigned int type;
-
-       if (!res) {
-               /*
-                * If no backing store use one of the preferred domain for basic
-                * stats. We take the MSB since that should give a reasonable
-                * view.
-                */
-               BUILD_BUG_ON(TTM_PL_VRAM < TTM_PL_TT ||
-                            TTM_PL_VRAM < TTM_PL_SYSTEM);
-               type = fls(bo->preferred_domains & AMDGPU_GEM_DOMAIN_MASK);
-               if (!type)
-                       return;
-               type--;
-               if (drm_WARN_ON_ONCE(&adev->ddev,
-                                    type >= ARRAY_SIZE(domain_to_pl)))
-                       return;
-               type = domain_to_pl[type];
-       } else {
-               type = res->mem_type;
-       }
-
-       if (drm_WARN_ON_ONCE(&adev->ddev, type >= sz))
-               return;
-
-       /* DRM stats common fields: */
-
-       if (drm_gem_object_is_shared_for_memory_stats(obj))
-               stats[type].drm.shared += size;
-       else
-               stats[type].drm.private += size;
-
-       if (res) {
-               stats[type].drm.resident += size;
-
-               if (!dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_BOOKKEEP))
-                       stats[type].drm.active += size;
-               else if (bo->flags & AMDGPU_GEM_CREATE_DISCARDABLE)
-                       stats[type].drm.purgeable += size;
-       }
-
-       /* amdgpu specific stats: */
-
-       if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) {
-               stats[TTM_PL_VRAM].requested += size;
-               if (type != TTM_PL_VRAM)
-                       stats[TTM_PL_VRAM].evicted += size;
-       } else if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_GTT) {
-               stats[TTM_PL_TT].requested += size;
-       }
-}
-
 /**
  * amdgpu_bo_release_notify - notification about a BO being released
  * @bo: pointer to a buffer object
@@ -1453,6 +1384,45 @@ u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo)
        return amdgpu_gmc_sign_extend(offset);
 }
 
+/**
+ * amdgpu_bo_mem_stats_placement - bo placement for memory accounting
+ * @bo:        the buffer object we should look at
+ *
+ * BO can have multiple preferred placements, to avoid double counting we want
+ * to file it under a single placement for memory stats.
+ * Luckily, if we take the highest set bit in preferred_domains the result is
+ * quite sensible.
+ *
+ * Returns:
+ * Which of the placements should the BO be accounted under.
+ */
+uint32_t amdgpu_bo_mem_stats_placement(struct amdgpu_bo *bo)
+{
+       uint32_t domain = bo->preferred_domains & AMDGPU_GEM_DOMAIN_MASK;
+
+       if (!domain)
+               return TTM_PL_SYSTEM;
+
+       switch (rounddown_pow_of_two(domain)) {
+       case AMDGPU_GEM_DOMAIN_CPU:
+               return TTM_PL_SYSTEM;
+       case AMDGPU_GEM_DOMAIN_GTT:
+               return TTM_PL_TT;
+       case AMDGPU_GEM_DOMAIN_VRAM:
+               return TTM_PL_VRAM;
+       case AMDGPU_GEM_DOMAIN_GDS:
+               return AMDGPU_PL_GDS;
+       case AMDGPU_GEM_DOMAIN_GWS:
+               return AMDGPU_PL_GWS;
+       case AMDGPU_GEM_DOMAIN_OA:
+               return AMDGPU_PL_OA;
+       case AMDGPU_GEM_DOMAIN_DOORBELL:
+               return AMDGPU_PL_DOORBELL;
+       default:
+               return TTM_PL_SYSTEM;
+       }
+}
+
 /**
  * amdgpu_bo_get_preferred_domain - get preferred domain
  * @adev: amdgpu device object
index be6769852ece4d752744a7117f8d818b7733873a..23d2c6ab9d627eabbd8ffd61304705b645c9e3c7 100644 (file)
@@ -300,9 +300,7 @@ int amdgpu_bo_sync_wait_resv(struct amdgpu_device *adev, struct dma_resv *resv,
 int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr);
 u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo);
 u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo);
-void amdgpu_bo_get_memory(struct amdgpu_bo *bo,
-                         struct amdgpu_mem_stats *stats,
-                         unsigned int size);
+uint32_t amdgpu_bo_mem_stats_placement(struct amdgpu_bo *bo);
 uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev,
                                            uint32_t domain);
 
index 2852a6064c9ac5f5b75481c010dda1cce29a8ce8..461fb8090ae04083326c460f48f96d6ae1485f3b 100644 (file)
 
 #include <linux/dma-direction.h>
 #include <drm/gpu_scheduler.h>
+#include <drm/ttm/ttm_placement.h>
 #include "amdgpu_vram_mgr.h"
-#include "amdgpu.h"
 
 #define AMDGPU_PL_GDS          (TTM_PL_PRIV + 0)
 #define AMDGPU_PL_GWS          (TTM_PL_PRIV + 1)
 #define AMDGPU_PL_OA           (TTM_PL_PRIV + 2)
 #define AMDGPU_PL_PREEMPT      (TTM_PL_PRIV + 3)
 #define AMDGPU_PL_DOORBELL     (TTM_PL_PRIV + 4)
-#define __AMDGPU_PL_LAST       (TTM_PL_PRIV + 4)
+#define __AMDGPU_PL_NUM        (TTM_PL_PRIV + 5)
 
 #define AMDGPU_GTT_MAX_TRANSFER_SIZE   512
 #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS        2
index d1e3d9efbf20a2d0821a172cfeaba23324cca9a8..40a51bd128c70abaf7eb2e1c055a876588808be5 100644 (file)
@@ -36,6 +36,7 @@
 #include <drm/ttm/ttm_tt.h>
 #include <drm/drm_exec.h>
 #include "amdgpu.h"
+#include "amdgpu_vm.h"
 #include "amdgpu_trace.h"
 #include "amdgpu_amdkfd.h"
 #include "amdgpu_gmc.h"
@@ -310,6 +311,111 @@ static void amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm *vm)
        spin_unlock(&vm->status_lock);
 }
 
+/**
+ * amdgpu_vm_update_shared - helper to update shared memory stat
+ * @base: base structure for tracking BO usage in a VM
+ *
+ * Takes the vm status_lock and updates the shared memory stat. If the basic
+ * stat changed (e.g. buffer was moved) amdgpu_vm_update_stats need to be called
+ * as well.
+ */
+static void amdgpu_vm_update_shared(struct amdgpu_vm_bo_base *base)
+{
+       struct amdgpu_vm *vm = base->vm;
+       struct amdgpu_bo *bo = base->bo;
+       uint64_t size = amdgpu_bo_size(bo);
+       uint32_t bo_memtype = amdgpu_bo_mem_stats_placement(bo);
+       bool shared;
+
+       spin_lock(&vm->status_lock);
+       shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base);
+       if (base->shared != shared) {
+               base->shared = shared;
+               if (shared) {
+                       vm->stats[bo_memtype].drm.shared += size;
+                       vm->stats[bo_memtype].drm.private -= size;
+               } else {
+                       vm->stats[bo_memtype].drm.shared -= size;
+                       vm->stats[bo_memtype].drm.private += size;
+               }
+       }
+       spin_unlock(&vm->status_lock);
+}
+
+/**
+ * amdgpu_vm_bo_update_shared - callback when bo gets shared/unshared
+ * @bo: amdgpu buffer object
+ *
+ * Update the per VM stats for all the vm if needed from private to shared or
+ * vice versa.
+ */
+void amdgpu_vm_bo_update_shared(struct amdgpu_bo *bo)
+{
+       struct amdgpu_vm_bo_base *base;
+
+       for (base = bo->vm_bo; base; base = base->next)
+               amdgpu_vm_update_shared(base);
+}
+
+/**
+ * amdgpu_vm_update_stats_locked - helper to update normal memory stat
+ * @base: base structure for tracking BO usage in a VM
+ * @res:  the ttm_resource to use for the purpose of accounting, may or may not
+ *        be bo->tbo.resource
+ * @sign: if we should add (+1) or subtract (-1) from the stat
+ *
+ * Caller need to have the vm status_lock held. Useful for when multiple update
+ * need to happen at the same time.
+ */
+static void amdgpu_vm_update_stats_locked(struct amdgpu_vm_bo_base *base,
+                           struct ttm_resource *res, int sign)
+{
+       struct amdgpu_vm *vm = base->vm;
+       struct amdgpu_bo *bo = base->bo;
+       int64_t size = sign * amdgpu_bo_size(bo);
+       uint32_t bo_memtype = amdgpu_bo_mem_stats_placement(bo);
+
+       /* For drm-total- and drm-shared-, BO are accounted by their preferred
+        * placement, see also amdgpu_bo_mem_stats_placement.
+        */
+       if (base->shared)
+               vm->stats[bo_memtype].drm.shared += size;
+       else
+               vm->stats[bo_memtype].drm.private += size;
+
+       if (res && res->mem_type < __AMDGPU_PL_NUM) {
+               uint32_t res_memtype = res->mem_type;
+
+               vm->stats[res_memtype].drm.resident += size;
+               /* BO only count as purgeable if it is resident,
+                * since otherwise there's nothing to purge.
+                */
+               if (bo->flags & AMDGPU_GEM_CREATE_DISCARDABLE)
+                       vm->stats[res_memtype].drm.purgeable += size;
+               if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(res_memtype)))
+                       vm->stats[bo_memtype].evicted += size;
+       }
+}
+
+/**
+ * amdgpu_vm_update_stats - helper to update normal memory stat
+ * @base: base structure for tracking BO usage in a VM
+ * @res:  the ttm_resource to use for the purpose of accounting, may or may not
+ *        be bo->tbo.resource
+ * @sign: if we should add (+1) or subtract (-1) from the stat
+ *
+ * Updates the basic memory stat when bo is added/deleted/moved.
+ */
+void amdgpu_vm_update_stats(struct amdgpu_vm_bo_base *base,
+                           struct ttm_resource *res, int sign)
+{
+       struct amdgpu_vm *vm = base->vm;
+
+       spin_lock(&vm->status_lock);
+       amdgpu_vm_update_stats_locked(base, res, sign);
+       spin_unlock(&vm->status_lock);
+}
+
 /**
  * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm
  *
@@ -333,6 +439,11 @@ void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
        base->next = bo->vm_bo;
        bo->vm_bo = base;
 
+       spin_lock(&vm->status_lock);
+       base->shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base);
+       amdgpu_vm_update_stats_locked(base, bo->tbo.resource, +1);
+       spin_unlock(&vm->status_lock);
+
        if (!amdgpu_vm_is_bo_always_valid(vm, bo))
                return;
 
@@ -1082,53 +1193,11 @@ error_free:
        return r;
 }
 
-static void amdgpu_vm_bo_get_memory(struct amdgpu_bo_va *bo_va,
-                                   struct amdgpu_mem_stats *stats,
-                                   unsigned int size)
-{
-       struct amdgpu_vm *vm = bo_va->base.vm;
-       struct amdgpu_bo *bo = bo_va->base.bo;
-
-       if (!bo)
-               return;
-
-       /*
-        * For now ignore BOs which are currently locked and potentially
-        * changing their location.
-        */
-       if (!amdgpu_vm_is_bo_always_valid(vm, bo) &&
-           !dma_resv_trylock(bo->tbo.base.resv))
-               return;
-
-       amdgpu_bo_get_memory(bo, stats, size);
-       if (!amdgpu_vm_is_bo_always_valid(vm, bo))
-               dma_resv_unlock(bo->tbo.base.resv);
-}
-
 void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
-                         struct amdgpu_mem_stats *stats,
-                         unsigned int size)
+                         struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM])
 {
-       struct amdgpu_bo_va *bo_va, *tmp;
-
        spin_lock(&vm->status_lock);
-       list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status)
-               amdgpu_vm_bo_get_memory(bo_va, stats, size);
-
-       list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.vm_status)
-               amdgpu_vm_bo_get_memory(bo_va, stats, size);
-
-       list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status)
-               amdgpu_vm_bo_get_memory(bo_va, stats, size);
-
-       list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status)
-               amdgpu_vm_bo_get_memory(bo_va, stats, size);
-
-       list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status)
-               amdgpu_vm_bo_get_memory(bo_va, stats, size);
-
-       list_for_each_entry_safe(bo_va, tmp, &vm->done, base.vm_status)
-               amdgpu_vm_bo_get_memory(bo_va, stats, size);
+       memcpy(stats, vm->stats, sizeof(*stats) * __AMDGPU_PL_NUM);
        spin_unlock(&vm->status_lock);
 }
 
@@ -2075,6 +2144,7 @@ void amdgpu_vm_bo_del(struct amdgpu_device *adev,
                        if (*base != &bo_va->base)
                                continue;
 
+                       amdgpu_vm_update_stats(*base, bo->tbo.resource, -1);
                        *base = bo_va->base.next;
                        break;
                }
@@ -2173,6 +2243,32 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_bo *bo, bool evicted)
        }
 }
 
+/**
+ * amdgpu_vm_bo_move - handle BO move
+ *
+ * @bo: amdgpu buffer object
+ * @new_mem: the new placement of the BO move
+ * @evicted: is the BO evicted
+ *
+ * Update the memory stats for the new placement and mark @bo as invalid.
+ */
+void amdgpu_vm_bo_move(struct amdgpu_bo *bo, struct ttm_resource *new_mem,
+                      bool evicted)
+{
+       struct amdgpu_vm_bo_base *bo_base;
+
+       for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
+               struct amdgpu_vm *vm = bo_base->vm;
+
+               spin_lock(&vm->status_lock);
+               amdgpu_vm_update_stats_locked(bo_base, bo->tbo.resource, -1);
+               amdgpu_vm_update_stats_locked(bo_base, new_mem, +1);
+               spin_unlock(&vm->status_lock);
+       }
+
+       amdgpu_vm_bo_invalidate(bo, evicted);
+}
+
 /**
  * amdgpu_vm_get_block_size - calculate VM page table size as power of two
  *
@@ -2592,6 +2688,16 @@ void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
        vm->is_compute_context = false;
 }
 
+static int amdgpu_vm_stats_is_zero(struct amdgpu_vm *vm)
+{
+       for (int i = 0; i < __AMDGPU_PL_NUM; ++i) {
+               if (!(drm_memory_stats_is_zero(&vm->stats[i].drm) &&
+                     vm->stats[i].evicted == 0))
+                       return false;
+       }
+       return true;
+}
+
 /**
  * amdgpu_vm_fini - tear down a vm instance
  *
@@ -2615,7 +2721,6 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 
        root = amdgpu_bo_ref(vm->root.bo);
        amdgpu_bo_reserve(root, true);
-       amdgpu_vm_put_task_info(vm->task_info);
        amdgpu_vm_set_pasid(adev, vm, 0);
        dma_fence_wait(vm->last_unlocked, false);
        dma_fence_put(vm->last_unlocked);
@@ -2664,6 +2769,16 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
        }
 
        ttm_lru_bulk_move_fini(&adev->mman.bdev, &vm->lru_bulk_move);
+
+       if (!amdgpu_vm_stats_is_zero(vm)) {
+               struct amdgpu_task_info *ti = vm->task_info;
+
+               dev_warn(adev->dev,
+                        "VM memory stats for proc %s(%d) task %s(%d) is non-zero when fini\n",
+                        ti->process_name, ti->pid, ti->task_name, ti->tgid);
+       }
+
+       amdgpu_vm_put_task_info(vm->task_info);
 }
 
 /**
index 6a1b344e15e1b16f0d5ed217758f6fc489b6a89b..a3e128e373bc62233bd0361b7f95584c8ebacd15 100644 (file)
@@ -35,6 +35,7 @@
 #include "amdgpu_sync.h"
 #include "amdgpu_ring.h"
 #include "amdgpu_ids.h"
+#include "amdgpu_ttm.h"
 
 struct drm_exec;
 
@@ -202,9 +203,13 @@ struct amdgpu_vm_bo_base {
        /* protected by bo being reserved */
        struct amdgpu_vm_bo_base        *next;
 
-       /* protected by spinlock */
+       /* protected by vm status_lock */
        struct list_head                vm_status;
 
+       /* if the bo is counted as shared in mem stats
+        * protected by vm status_lock */
+       bool                            shared;
+
        /* protected by the BO being reserved */
        bool                            moved;
 };
@@ -324,10 +329,7 @@ struct amdgpu_vm_fault_info {
 struct amdgpu_mem_stats {
        struct drm_memory_stats drm;
 
-       /* buffers that requested this placement */
-       uint64_t requested;
-       /* buffers that requested this placement
-        * but are currently evicted */
+       /* buffers that requested this placement but are currently evicted */
        uint64_t evicted;
 };
 
@@ -345,6 +347,9 @@ struct amdgpu_vm {
        /* Lock to protect vm_bo add/del/move on all lists of vm */
        spinlock_t              status_lock;
 
+       /* Memory statistics for this vm, protected by status_lock */
+       struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM];
+
        /* Per-VM and PT BOs who needs a validation */
        struct list_head        evicted;
 
@@ -525,6 +530,11 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
                        bool clear);
 bool amdgpu_vm_evictable(struct amdgpu_bo *bo);
 void amdgpu_vm_bo_invalidate(struct amdgpu_bo *bo, bool evicted);
+void amdgpu_vm_update_stats(struct amdgpu_vm_bo_base *base,
+                           struct ttm_resource *new_res, int sign);
+void amdgpu_vm_bo_update_shared(struct amdgpu_bo *bo);
+void amdgpu_vm_bo_move(struct amdgpu_bo *bo, struct ttm_resource *new_mem,
+                      bool evicted);
 uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr);
 struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
                                       struct amdgpu_bo *bo);
@@ -575,8 +585,7 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
 void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
                                struct amdgpu_vm *vm);
 void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
-                         struct amdgpu_mem_stats *stats,
-                         unsigned int size);
+                         struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM]);
 
 int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
                       struct amdgpu_bo_vm *vmbo, bool immediate);
index f78a0434a48fa24d5db00a505116cb36fda768be..b0bf216821152e48965b5d970ca21c9dab9aa5a6 100644 (file)
@@ -537,6 +537,7 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
        if (!entry->bo)
                return;
 
+       amdgpu_vm_update_stats(entry, entry->bo->tbo.resource, -1);
        entry->bo->vm_bo = NULL;
        ttm_bo_set_bulk_move(&entry->bo->tbo, NULL);