]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
drm/amdkfd: fix the hang caused by the write reorder to fence_addr
authorVictor Zhao <Victor.Zhao@amd.com>
Thu, 17 Oct 2024 08:20:40 +0000 (16:20 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 22 Oct 2024 21:50:39 +0000 (17:50 -0400)
make sure KFD_FENCE_INIT write to fence_addr before pm_send_query_status
called, to avoid qcm fence timeout caused by incorrect ordering.

Signed-off-by: Victor Zhao <Victor.Zhao@amd.com>
Reviewed-by: Philip Yang <Philip.Yang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h

index b2b16a812e73b8d50f048a02bea1f4ba7637b04f..5a318376203c92b95e9773d5b7651887d2e9a14e 100644 (file)
@@ -2048,7 +2048,7 @@ int amdkfd_fence_wait_timeout(struct device_queue_manager *dqm,
 {
        unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies;
        struct device *dev = dqm->dev->adev->dev;
-       uint64_t *fence_addr =  dqm->fence_addr;
+       volatile uint64_t *fence_addr = dqm->fence_addr;
 
        while (*fence_addr != fence_value) {
                /* Fatal err detected, this response won't come */
@@ -2254,6 +2254,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
                goto out;
 
        *dqm->fence_addr = KFD_FENCE_INIT;
+       mb();
        pm_send_query_status(&dqm->packet_mgr, dqm->fence_gpu_addr,
                                KFD_FENCE_COMPLETED);
        /* should be timed out */
index 09ab36f8e8c69e9f0103e7f1b80e1edb5e7a50c6..bddb169bb301e1ccf21133698bf3088d157bb05d 100644 (file)
@@ -260,7 +260,7 @@ struct device_queue_manager {
        uint16_t                vmid_pasid[VMID_NUM];
        uint64_t                pipelines_addr;
        uint64_t                fence_gpu_addr;
-       uint64_t                *fence_addr;
+       volatile uint64_t       *fence_addr;
        struct kfd_mem_obj      *fence_mem;
        bool                    active_runlist;
        int                     sched_policy;