drm/amd: Fix MQD and control stack alignment for non-4K

author Donet Tom <donettom@linux.ibm.com>

Mon, 23 Mar 2026 04:28:38 +0000 (09:58 +0530)

committer Alex Deucher <alexander.deucher@amd.com>

Mon, 30 Mar 2026 20:12:27 +0000 (16:12 -0400)
author Donet Tom <donettom@linux.ibm.com>
Mon, 23 Mar 2026 04:28:38 +0000 (09:58 +0530)
committer Alex Deucher <alexander.deucher@amd.com>
Mon, 30 Mar 2026 20:12:27 +0000 (16:12 -0400)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c

index e2d32c29668af455c959f93c2d44b6412477b770..bc772ca3dab726f136590b98cbcfdf0d75d7ff0a 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -403,6 +403,50 @@ void amdgpu_gart_map_vram_range(struct amdgpu_device *adev, uint64_t pa,
         drm_dev_exit(idx);
  }
  
+/**
+ * amdgpu_gart_map_gfx9_mqd - map mqd and ctrl_stack dma_addresses into GART entries
+ *
+ * @adev: amdgpu_device pointer
+ * @offset: offset into the GPU's gart aperture
+ * @pages: number of pages to bind
+ * @dma_addr: DMA addresses of pages
+ * @flags: page table entry flags
+ *
+ * Map the MQD and control stack addresses into GART entries with the correct
+ * memory types on gfxv9. The MQD occupies the first 4KB and is followed by
+ * the control stack. The MQD uses UC (uncached) memory, while the control stack
+ * uses NC (non-coherent) memory.
+ */
+void amdgpu_gart_map_gfx9_mqd(struct amdgpu_device *adev, uint64_t offset,
+                       int pages, dma_addr_t *dma_addr, uint64_t flags)
+{
+       uint64_t page_base;
+       unsigned int i, j, t;
+       int idx;
+       uint64_t ctrl_flags = AMDGPU_PTE_MTYPE_VG10(flags, AMDGPU_MTYPE_NC);
+       void *dst;
+
+       if (!adev->gart.ptr)
+               return;
+
+       if (!drm_dev_enter(adev_to_drm(adev), &idx))
+               return;
+
+       t = offset / AMDGPU_GPU_PAGE_SIZE;
+       dst = adev->gart.ptr;
+       for (i = 0; i < pages; i++) {
+               page_base = dma_addr[i];
+               for (j = 0; j < AMDGPU_GPU_PAGES_IN_CPU_PAGE; j++, t++) {
+                       if ((i == 0) && (j == 0))
+                               amdgpu_gmc_set_pte_pde(adev, dst, t, page_base, flags);
+                       else
+                               amdgpu_gmc_set_pte_pde(adev, dst, t, page_base, ctrl_flags);
+                       page_base += AMDGPU_GPU_PAGE_SIZE;
+               }
+       }
+       drm_dev_exit(idx);
+}
+
  /**
   * amdgpu_gart_bind - bind pages into the gart page table
   *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h

index d3118275ddae8f3ae23ddd7e60a11ffdc545df6b..6ebd2da32ea60b0c80efc9e85ff70bce267376a6 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
@@ -62,6 +62,8 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
  void amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
                      int pages, dma_addr_t *dma_addr, uint64_t flags,
                      void *dst);
+void amdgpu_gart_map_gfx9_mqd(struct amdgpu_device *adev, uint64_t offset,
+                       int pages, dma_addr_t *dma_addr, uint64_t flags);
  void amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
                       int pages, dma_addr_t *dma_addr, uint64_t flags);
  void amdgpu_gart_map_vram_range(struct amdgpu_device *adev, uint64_t pa,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

index eeaa56c8d129faafd8113b7f741471d8c9c32bdb..0ccb31788b20bc21282fa167b445b2f0cd238394 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -853,25 +853,15 @@ static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev,
         int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp);
         uint64_t page_idx, pages_per_xcc;
         int i;
-       uint64_t ctrl_flags = AMDGPU_PTE_MTYPE_VG10(flags, AMDGPU_MTYPE_NC);
  
         pages_per_xcc = total_pages;
         do_div(pages_per_xcc, num_xcc);
  
         for (i = 0, page_idx = 0; i < num_xcc; i++, page_idx += pages_per_xcc) {
-               /* MQD page: use default flags */
-               amdgpu_gart_bind(adev,
+               amdgpu_gart_map_gfx9_mqd(adev,
                                 gtt->offset + (page_idx << PAGE_SHIFT),
-                               1, &gtt->ttm.dma_address[page_idx], flags);
-               /*
-                * Ctrl pages - modify the memory type to NC (ctrl_flags) from
-                * the second page of the BO onward.
-                */
-               amdgpu_gart_bind(adev,
-                               gtt->offset + ((page_idx + 1) << PAGE_SHIFT),
-                               pages_per_xcc - 1,
-                               &gtt->ttm.dma_address[page_idx + 1],
-                               ctrl_flags);
+                               pages_per_xcc, &gtt->ttm.dma_address[page_idx],
+                               flags);
         }
  }
  
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c

index d5c234f30e8d64a94e43637990064bd45bc2b1bc..a535f151cb5fddd565477f6d3a01a9223fbbda87 100644 (file)
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -42,9 +42,16 @@ static uint64_t mqd_stride_v9(struct mqd_manager *mm,
                                 struct queue_properties *q)
  {
         if (mm->dev->kfd->cwsr_enabled &&
-           q->type == KFD_QUEUE_TYPE_COMPUTE)
-               return ALIGN(q->ctl_stack_size, PAGE_SIZE) +
-                       ALIGN(sizeof(struct v9_mqd), PAGE_SIZE);
+           q->type == KFD_QUEUE_TYPE_COMPUTE) {
+
+               /* On gfxv9, the MQD resides in the first 4K page,
+                * followed by the control stack. Align both to
+                * AMDGPU_GPU_PAGE_SIZE to maintain the required 4K boundary.
+                */
+
+               return ALIGN(ALIGN(q->ctl_stack_size, AMDGPU_GPU_PAGE_SIZE) +
+                       ALIGN(sizeof(struct v9_mqd), AMDGPU_GPU_PAGE_SIZE), PAGE_SIZE);
+       }
  
         return mm->mqd_size;
  }
@@ -151,8 +158,8 @@ static struct kfd_mem_obj *allocate_mqd(struct mqd_manager *mm,
                 if (!mqd_mem_obj)
                         return NULL;
                 retval = amdgpu_amdkfd_alloc_kernel_mem(node->adev,
-                       (ALIGN(q->ctl_stack_size, PAGE_SIZE) +
-                       ALIGN(sizeof(struct v9_mqd), PAGE_SIZE)) *
+                       (ALIGN(ALIGN(q->ctl_stack_size, AMDGPU_GPU_PAGE_SIZE) +
+                       ALIGN(sizeof(struct v9_mqd), AMDGPU_GPU_PAGE_SIZE), PAGE_SIZE)) *
                         NUM_XCC(node->xcc_mask),
                         mqd_on_vram(node->adev) ? AMDGPU_GEM_DOMAIN_VRAM :
                                                   AMDGPU_GEM_DOMAIN_GTT,
@@ -360,7 +367,7 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd,
         struct kfd_context_save_area_header header;
  
         /* Control stack is located one page after MQD. */
-       void *mqd_ctl_stack = (void *)((uintptr_t)mqd + PAGE_SIZE);
+       void *mqd_ctl_stack = (void *)((uintptr_t)mqd + AMDGPU_GPU_PAGE_SIZE);
  
         m = get_mqd(mqd);
  
@@ -397,7 +404,7 @@ static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, voi
  {
         struct v9_mqd *m;
         /* Control stack is located one page after MQD. */
-       void *ctl_stack = (void *)((uintptr_t)mqd + PAGE_SIZE);
+       void *ctl_stack = (void *)((uintptr_t)mqd + AMDGPU_GPU_PAGE_SIZE);
  
         m = get_mqd(mqd);
  
@@ -443,7 +450,7 @@ static void restore_mqd(struct mqd_manager *mm, void **mqd,
                 *gart_addr = addr;
  
         /* Control stack is located one page after MQD. */
-       ctl_stack = (void *)((uintptr_t)*mqd + PAGE_SIZE);
+       ctl_stack = (void *)((uintptr_t)*mqd + AMDGPU_GPU_PAGE_SIZE);
         memcpy(ctl_stack, ctl_stack_src, ctl_stack_size);
  
         m->cp_hqd_pq_doorbell_control =
author	Donet Tom <donettom@linux.ibm.com>
	Mon, 23 Mar 2026 04:28:38 +0000 (09:58 +0530)
committer	Alex Deucher <alexander.deucher@amd.com>
	Mon, 30 Mar 2026 20:12:27 +0000 (16:12 -0400)
drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c		patch \| blob \| blame \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h		patch \| blob \| blame \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c		patch \| blob \| blame \| history
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c		patch \| blob \| blame \| history