]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
drm/amdgpu: statically assign gart windows to ttm entities
authorPierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Tue, 18 Nov 2025 13:58:43 +0000 (14:58 +0100)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 23 Feb 2026 19:16:29 +0000 (14:16 -0500)
If multiple entities share the same window we must make sure
that jobs using them are executed sequentially.

This commit gives separate windows to each entity, so jobs
from multiple entities could execute in parallel if needed.
(for now they all use the first sdma engine, so it makes no
difference yet).
The entity stores the gart window offsets to centralize the
"window id" to "window offset" in a single place.

default_entity doesn't get any windows reserved since there is
no use for them.

---
v3:
- renamed gart_window_lock -> lock (Christian)
- added amdgpu_ttm_buffer_entity_init (Christian)
- fixed gart_addr in svm_migrate_gart_map (Felix)
- renamed gart_window_idX -> gart_window_offs[]
- added amdgpu_compute_gart_address
v4:
- u32 -> u64
- added kerneldoc
v5:
- removed gtt_window_lock
- simplified gart window creation and use: entities using a
  single window now uses window #0 instead of #1
- fix dst_addr calculation in kfd_migrate.c
---

Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Acked-by: Felix Kuehling <felix.kuehling@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
drivers/gpu/drm/amd/amdkfd/kfd_migrate.c

index 5179fa008626e06413e81decb51bc2b2aeddd6f8..a0940db1cd36c8ea82d367977346fb4fd5d9d058 100644 (file)
@@ -742,7 +742,7 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
         * translation. Avoid this by doing the invalidation from the SDMA
         * itself at least for GART.
         */
-       mutex_lock(&adev->mman.gtt_window_lock);
+       mutex_lock(&adev->mman.default_entity.lock);
        r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.default_entity.base,
                                     AMDGPU_FENCE_OWNER_UNDEFINED,
                                     16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
@@ -755,7 +755,7 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
        job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
        amdgpu_ring_pad_ib(ring, &job->ibs[0]);
        fence = amdgpu_job_submit(job);
-       mutex_unlock(&adev->mman.gtt_window_lock);
+       mutex_unlock(&adev->mman.default_entity.lock);
 
        dma_fence_wait(fence, false);
        dma_fence_put(fence);
@@ -763,7 +763,7 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
        return;
 
 error_alloc:
-       mutex_unlock(&adev->mman.gtt_window_lock);
+       mutex_unlock(&adev->mman.default_entity.lock);
        dev_err(adev->dev, "Error flushing GPU TLB using the SDMA (%d)!\n", r);
 }
 
index eeaa56c8d129faafd8113b7f741471d8c9c32bdb..3b2c0ae67ce4cbbf9db54b520510fbefa1a15695 100644 (file)
@@ -228,9 +228,7 @@ static int amdgpu_ttm_map_buffer(struct amdgpu_ttm_buffer_entity *entity,
 
        *size = min(*size, (uint64_t)num_pages * PAGE_SIZE - offset);
 
-       *addr = adev->gmc.gart_start;
-       *addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE *
-               AMDGPU_GPU_PAGE_SIZE;
+       *addr = amdgpu_compute_gart_address(&adev->gmc, entity, window);
        *addr += offset;
 
        num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
@@ -248,7 +246,7 @@ static int amdgpu_ttm_map_buffer(struct amdgpu_ttm_buffer_entity *entity,
        src_addr += job->ibs[0].gpu_addr;
 
        dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
-       dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8;
+       dst_addr += (entity->gart_window_offs[window] >> AMDGPU_GPU_PAGE_SHIFT) * 8;
        amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
                                dst_addr, num_bytes, 0);
 
@@ -313,7 +311,7 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
        amdgpu_res_first(src->mem, src->offset, size, &src_mm);
        amdgpu_res_first(dst->mem, dst->offset, size, &dst_mm);
 
-       mutex_lock(&adev->mman.gtt_window_lock);
+       mutex_lock(&entity->lock);
        while (src_mm.remaining) {
                uint64_t from, to, cur_size, tiling_flags;
                uint32_t num_type, data_format, max_com, write_compress_disable;
@@ -368,7 +366,7 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
                amdgpu_res_next(&dst_mm, cur_size);
        }
 error:
-       mutex_unlock(&adev->mman.gtt_window_lock);
+       mutex_unlock(&entity->lock);
        *f = fence;
        return r;
 }
@@ -1580,7 +1578,7 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo,
        if (r)
                goto out;
 
-       mutex_lock(&adev->mman.gtt_window_lock);
+       mutex_lock(&adev->mman.default_entity.lock);
        amdgpu_res_first(abo->tbo.resource, offset, len, &src_mm);
        src_addr = amdgpu_ttm_domain_start(adev, bo->resource->mem_type) +
                src_mm.start;
@@ -1592,7 +1590,7 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo,
                                PAGE_SIZE, 0);
 
        fence = amdgpu_ttm_job_submit(adev, job, num_dw);
-       mutex_unlock(&adev->mman.gtt_window_lock);
+       mutex_unlock(&adev->mman.default_entity.lock);
 
        if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout))
                r = -ETIMEDOUT;
@@ -2013,6 +2011,27 @@ static void amdgpu_ttm_free_mmio_remap_bo(struct amdgpu_device *adev)
        adev->rmmio_remap.bo = NULL;
 }
 
+static int amdgpu_ttm_buffer_entity_init(struct amdgpu_ttm_buffer_entity *entity,
+                                        int starting_gart_window,
+                                        u32 num_gart_windows)
+{
+       int i;
+
+       mutex_init(&entity->lock);
+
+       if (ARRAY_SIZE(entity->gart_window_offs) < num_gart_windows)
+               return starting_gart_window;
+
+       for (i = 0; i < num_gart_windows; i++) {
+               entity->gart_window_offs[i] =
+                       (u64)starting_gart_window * AMDGPU_GTT_MAX_TRANSFER_SIZE *
+                               AMDGPU_GPU_PAGE_SIZE;
+               starting_gart_window++;
+       }
+
+       return starting_gart_window;
+}
+
 /*
  * amdgpu_ttm_init - Init the memory management (ttm) as well as various
  * gtt/vram related fields.
@@ -2027,8 +2046,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
        uint64_t gtt_size;
        int r;
 
-       mutex_init(&adev->mman.gtt_window_lock);
-
        dma_set_max_seg_size(adev->dev, UINT_MAX);
        /* No others user of address space so set it to 0 */
        r = ttm_device_init(&adev->mman.bdev, &amdgpu_bo_driver, adev->dev,
@@ -2302,6 +2319,7 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
 void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
 {
        struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
+       u32 used_windows;
        uint64_t size;
        int r;
 
@@ -2345,6 +2363,13 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
                        drm_sched_entity_destroy(&adev->mman.clear_entity.base);
                        goto error_free_entity;
                }
+
+               /* Statically assign GART windows to each entity. */
+               used_windows = amdgpu_ttm_buffer_entity_init(&adev->mman.default_entity, 0, 0);
+               used_windows = amdgpu_ttm_buffer_entity_init(&adev->mman.move_entity,
+                                                            used_windows, 2);
+               used_windows = amdgpu_ttm_buffer_entity_init(&adev->mman.clear_entity,
+                                                            used_windows, 1);
        } else {
                drm_sched_entity_destroy(&adev->mman.default_entity.base);
                drm_sched_entity_destroy(&adev->mman.clear_entity.base);
@@ -2503,6 +2528,7 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
                            struct dma_fence **fence)
 {
        struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+       struct amdgpu_ttm_buffer_entity *entity;
        struct amdgpu_res_cursor cursor;
        u64 addr;
        int r = 0;
@@ -2513,11 +2539,12 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
        if (!fence)
                return -EINVAL;
 
+       entity = &adev->mman.clear_entity;
        *fence = dma_fence_get_stub();
 
        amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
 
-       mutex_lock(&adev->mman.gtt_window_lock);
+       mutex_lock(&entity->lock);
        while (cursor.remaining) {
                struct dma_fence *next = NULL;
                u64 size;
@@ -2530,13 +2557,12 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
                /* Never clear more than 256MiB at once to avoid timeouts */
                size = min(cursor.size, 256ULL << 20);
 
-               r = amdgpu_ttm_map_buffer(&adev->mman.clear_entity,
-                                         &bo->tbo, bo->tbo.resource, &cursor,
-                                         1, false, &size, &addr);
+               r = amdgpu_ttm_map_buffer(entity, &bo->tbo, bo->tbo.resource, &cursor,
+                                         0, false, &size, &addr);
                if (r)
                        goto err;
 
-               r = amdgpu_ttm_fill_mem(adev, &adev->mman.clear_entity, 0, addr, size, resv,
+               r = amdgpu_ttm_fill_mem(adev, entity, 0, addr, size, resv,
                                        &next, true,
                                        AMDGPU_KERNEL_JOB_ID_TTM_CLEAR_BUFFER);
                if (r)
@@ -2548,7 +2574,7 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
                amdgpu_res_next(&cursor, size);
        }
 err:
-       mutex_unlock(&adev->mman.gtt_window_lock);
+       mutex_unlock(&entity->lock);
 
        return r;
 }
@@ -2573,7 +2599,7 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity,
 
        amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &dst);
 
-       mutex_lock(&adev->mman.gtt_window_lock);
+       mutex_lock(&entity->lock);
        while (dst.remaining) {
                struct dma_fence *next;
                uint64_t cur_size, to;
@@ -2582,7 +2608,7 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity,
                cur_size = min(dst.size, 256ULL << 20);
 
                r = amdgpu_ttm_map_buffer(entity, &bo->tbo, bo->tbo.resource, &dst,
-                                         1, false, &cur_size, &to);
+                                         0, false, &cur_size, &to);
                if (r)
                        goto error;
 
@@ -2598,7 +2624,7 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity,
                amdgpu_res_next(&dst, cur_size);
        }
 error:
-       mutex_unlock(&adev->mman.gtt_window_lock);
+       mutex_unlock(&entity->lock);
        if (f)
                *f = dma_fence_get(fence);
        dma_fence_put(fence);
index 143201ecea3fe948605c7cbff382dbe4253a9e2d..871388b86503ed7194b0736a8fbe616c77711cd7 100644 (file)
@@ -29,6 +29,7 @@
 #include <drm/ttm/ttm_placement.h>
 #include "amdgpu_vram_mgr.h"
 #include "amdgpu_hmm.h"
+#include "amdgpu_gmc.h"
 
 #define AMDGPU_PL_GDS          (TTM_PL_PRIV + 0)
 #define AMDGPU_PL_GWS          (TTM_PL_PRIV + 1)
@@ -39,7 +40,7 @@
 #define __AMDGPU_PL_NUM        (TTM_PL_PRIV + 6)
 
 #define AMDGPU_GTT_MAX_TRANSFER_SIZE   512
-#define AMDGPU_GTT_NUM_TRANSFER_WINDOWS        2
+#define AMDGPU_GTT_NUM_TRANSFER_WINDOWS        3
 
 extern const struct attribute_group amdgpu_vram_mgr_attr_group;
 extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
@@ -54,6 +55,8 @@ struct amdgpu_gtt_mgr {
 
 struct amdgpu_ttm_buffer_entity {
        struct drm_sched_entity base;
+       struct mutex            lock;
+       u64                     gart_window_offs[2];
 };
 
 struct amdgpu_mman {
@@ -67,8 +70,7 @@ struct amdgpu_mman {
        struct amdgpu_ring                      *buffer_funcs_ring;
        bool                                    buffer_funcs_enabled;
 
-       struct mutex                            gtt_window_lock;
-
+       /* @default_entity: for workarounds, has no gart windows */
        struct amdgpu_ttm_buffer_entity default_entity;
        struct amdgpu_ttm_buffer_entity clear_entity;
        struct amdgpu_ttm_buffer_entity move_entity;
@@ -205,6 +207,19 @@ static inline int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo,
 }
 #endif
 
+/**
+ * amdgpu_compute_gart_address() - Returns GART address of an entity's window
+ * @gmc: The &struct amdgpu_gmc instance to use
+ * @entity: The &struct amdgpu_ttm_buffer_entity owning the GART window
+ * @index: The window to use (must be 0 or 1)
+ */
+static inline u64 amdgpu_compute_gart_address(struct amdgpu_gmc *gmc,
+                                             struct amdgpu_ttm_buffer_entity *entity,
+                                             int index)
+{
+       return gmc->gart_start + entity->gart_window_offs[index];
+}
+
 void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct amdgpu_hmm_range *range);
 int amdgpu_ttm_tt_get_userptr(const struct ttm_buffer_object *tbo,
                              uint64_t *user_addr);
index b021f1e5611473e826f958f6fe6f7afb1761e150..10bc81ce37cbf04ebf9b63633248e190004f6c74 100644 (file)
@@ -59,8 +59,7 @@ svm_migrate_gart_map(struct amdgpu_ring *ring,
        void *cpu_addr;
        int r;
 
-       /* use gart window 0 */
-       *gart_addr = adev->gmc.gart_start;
+       *gart_addr = amdgpu_compute_gart_address(&adev->gmc, entity, 0);
 
        num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
        num_bytes = npages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE;
@@ -78,6 +77,7 @@ svm_migrate_gart_map(struct amdgpu_ring *ring,
        src_addr += job->ibs[0].gpu_addr;
 
        dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
+       dst_addr += (entity->gart_window_offs[0] >> AMDGPU_GPU_PAGE_SHIFT) * 8;
        amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
                                dst_addr, num_bytes, 0);
 
@@ -116,7 +116,7 @@ svm_migrate_gart_map(struct amdgpu_ring *ring,
  * multiple GTT_MAX_PAGES transfer, all sdma operations are serialized, wait for
  * the last sdma finish fence which is returned to check copy memory is done.
  *
- * Context: Process context, takes and releases gtt_window_lock
+ * Context: Process context
  *
  * Return:
  * 0 - OK, otherwise error code
@@ -136,9 +136,9 @@ svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
        u64 size;
        int r;
 
-       entity = &adev->mman.default_entity;
+       entity = &adev->mman.move_entity;
 
-       mutex_lock(&adev->mman.gtt_window_lock);
+       mutex_lock(&entity->lock);
 
        while (npages) {
                size = min(GTT_MAX_PAGES, npages);
@@ -175,7 +175,7 @@ svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
        }
 
 out_unlock:
-       mutex_unlock(&adev->mman.gtt_window_lock);
+       mutex_unlock(&entity->lock);
 
        return r;
 }