]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
drm/amdkfd: Block per-queue reset when halt_if_hws_hang=1
authorJay Cornwall <jay.cornwall@amd.com>
Thu, 16 Jan 2025 20:36:39 +0000 (14:36 -0600)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 17 Feb 2025 09:05:07 +0000 (10:05 +0100)
commit f214b7beb00621b983e67ce97477afc3ab4b38f4 upstream.

The purpose of halt_if_hws_hang is to preserve GPU state for driver
debugging when queue preemption fails. Issuing per-queue reset may
kill wavefronts which caused the preemption failure.

Signed-off-by: Jay Cornwall <jay.cornwall@amd.com>
Reviewed-by: Jonathan Kim <Jonathan.Kim@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org # 6.12.x
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c

index f5b3ed20e891b374416957ed98173c1071f121af..3cfb4a38d17c7f497b26920b86792f0e77363cf2 100644 (file)
@@ -2290,9 +2290,9 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
         */
        mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];
        if (mqd_mgr->check_preemption_failed(mqd_mgr, dqm->packet_mgr.priv_queue->queue->mqd)) {
+               while (halt_if_hws_hang)
+                       schedule();
                if (reset_queues_on_hws_hang(dqm)) {
-                       while (halt_if_hws_hang)
-                               schedule();
                        dqm->is_hws_hang = true;
                        kfd_hws_hang(dqm);
                        retval = -ETIME;