]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob - releases/6.6.28/drm-amdkfd-reset-gpu-on-queue-preemption-failure.patch
Linux 6.1.87
[thirdparty/kernel/stable-queue.git] / releases / 6.6.28 / drm-amdkfd-reset-gpu-on-queue-preemption-failure.patch
1 From 8bdfb4ea95ca738d33ef71376c21eba20130f2eb Mon Sep 17 00:00:00 2001
2 From: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
3 Date: Tue, 26 Mar 2024 15:32:46 -0400
4 Subject: drm/amdkfd: Reset GPU on queue preemption failure
5
6 From: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
7
8 commit 8bdfb4ea95ca738d33ef71376c21eba20130f2eb upstream.
9
10 Currently, with F32 HWS GPU reset is only when unmap queue fails.
11
12 However, if compute queue doesn't repond to preemption request in time
13 unmap will return without any error. In this case, only preemption error
14 is logged and Reset is not triggered. Call GPU reset in this case also.
15
16 Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
17 Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
18 Reviewed-by: Mukul Joshi <mukul.joshi@amd.com>
19 Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
20 Cc: stable@vger.kernel.org
21 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
22 ---
23 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 1 +
24 1 file changed, 1 insertion(+)
25
26 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
27 +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
28 @@ -1980,6 +1980,7 @@ static int unmap_queues_cpsch(struct dev
29 pr_err("HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n");
30 while (halt_if_hws_hang)
31 schedule();
32 + kfd_hws_hang(dqm);
33 return -ETIME;
34 }
35