From: Victor Zhao Date: Thu, 17 Oct 2024 08:20:40 +0000 (+0800) Subject: drm/amdkfd: fix the hang caused by the write reorder to fence_addr X-Git-Tag: v6.13-rc1~122^2~15^2~26 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8834456163a1b372a85891751e51cafbf443a2d8;p=thirdparty%2Fkernel%2Flinux.git drm/amdkfd: fix the hang caused by the write reorder to fence_addr make sure KFD_FENCE_INIT write to fence_addr before pm_send_query_status called, to avoid qcm fence timeout caused by incorrect ordering. Signed-off-by: Victor Zhao Reviewed-by: Philip Yang Signed-off-by: Alex Deucher --- diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index b2b16a812e73b..5a318376203c9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -2048,7 +2048,7 @@ int amdkfd_fence_wait_timeout(struct device_queue_manager *dqm, { unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies; struct device *dev = dqm->dev->adev->dev; - uint64_t *fence_addr = dqm->fence_addr; + volatile uint64_t *fence_addr = dqm->fence_addr; while (*fence_addr != fence_value) { /* Fatal err detected, this response won't come */ @@ -2254,6 +2254,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, goto out; *dqm->fence_addr = KFD_FENCE_INIT; + mb(); pm_send_query_status(&dqm->packet_mgr, dqm->fence_gpu_addr, KFD_FENCE_COMPLETED); /* should be timed out */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 09ab36f8e8c69..bddb169bb301e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -260,7 +260,7 @@ struct device_queue_manager { uint16_t vmid_pasid[VMID_NUM]; uint64_t pipelines_addr; uint64_t fence_gpu_addr; - uint64_t *fence_addr; + volatile uint64_t *fence_addr; struct kfd_mem_obj *fence_mem; bool active_runlist; int sched_policy;