]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
drm/amdkfd: Move gfx9.4.3 and gfx 9.5 MQD to HBM
authorPhilip Yang <Philip.Yang@amd.com>
Thu, 20 Nov 2025 21:43:04 +0000 (16:43 -0500)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 20 Jan 2026 22:15:46 +0000 (17:15 -0500)
To reduce queue switch latency further, move MQD to VRAM domain, CP
access MQD and control stack via FB aperture, this requires contiguous
pages.

After MQD is initialized, updated or restored, flush HDP to guarantee
the data is written to HBM and GPU cache is invalidated, then CP will
read the new MQD.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c

index 15770e9a7e632d37336b6e004ddef392bd2baaf9..877d0df50376a792b9ef12be598b82abef4a1481 100644 (file)
@@ -334,7 +334,8 @@ int amdgpu_amdkfd_alloc_kernel_mem(struct amdgpu_device *adev, size_t size,
        bp.size = size;
        bp.byte_align = PAGE_SIZE;
        bp.domain = domain;
-       bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+       bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
+                  AMDGPU_GEM_CREATE_CPU_GTT_USWC;
        bp.type = ttm_bo_type_kernel;
        bp.resv = NULL;
        bp.bo_ptr_size = sizeof(struct amdgpu_bo);
index d234db1381828eabdd84968c65052871fde3d9f6..d867dccae6758209c76c472066edf0f6e931c739 100644 (file)
@@ -109,6 +109,17 @@ static void set_priority(struct v9_mqd *m, struct queue_properties *q)
        m->cp_hqd_queue_priority = q->priority;
 }
 
+static bool mqd_on_vram(struct amdgpu_device *adev)
+{
+       switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+       case IP_VERSION(9, 4, 3):
+       case IP_VERSION(9, 5, 0):
+               return true;
+       default:
+               return false;
+       }
+}
+
 static struct kfd_mem_obj *allocate_mqd(struct kfd_node *node,
                struct queue_properties *q)
 {
@@ -139,7 +150,8 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_node *node,
                        (ALIGN(q->ctl_stack_size, PAGE_SIZE) +
                        ALIGN(sizeof(struct v9_mqd), PAGE_SIZE)) *
                        NUM_XCC(node->xcc_mask),
-                       AMDGPU_GEM_DOMAIN_GTT,
+                       mqd_on_vram(node->adev) ? AMDGPU_GEM_DOMAIN_VRAM :
+                                                 AMDGPU_GEM_DOMAIN_GTT,
                        &(mqd_mem_obj->mem),
                        &(mqd_mem_obj->gpu_addr),
                        (void *)&(mqd_mem_obj->cpu_ptr), true);
@@ -739,6 +751,9 @@ static void init_mqd_v9_4_3(struct mqd_manager *mm, void **mqd,
                        *gart_addr = xcc_gart_addr;
                }
        }
+
+       if (mqd_on_vram(mm->dev->adev))
+               amdgpu_device_flush_hdp(mm->dev->adev, NULL);
 }
 
 static void update_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
@@ -775,6 +790,9 @@ static void update_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
                        m->pm4_target_xcc_in_xcp = q->pm4_target_xcc;
                }
        }
+
+       if (mqd_on_vram(mm->dev->adev))
+               amdgpu_device_flush_hdp(mm->dev->adev, NULL);
 }
 
 static void restore_mqd_v9_4_3(struct mqd_manager *mm, void **mqd,
@@ -813,6 +831,9 @@ static void restore_mqd_v9_4_3(struct mqd_manager *mm, void **mqd,
                                        (uint8_t *)ctl_stack_src + xcc *  mqd_ctl_stack_size,
                                        mqd_ctl_stack_size);
        }
+
+       if (mqd_on_vram(mm->dev->adev))
+               amdgpu_device_flush_hdp(mm->dev->adev, NULL);
 }
 static int destroy_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
                   enum kfd_preempt_type type, unsigned int timeout,