]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
drm/amdgpu: flush coredump work before HW teardown
authorJesse Zhang <Jesse.Zhang@amd.com>
Tue, 24 Mar 2026 09:31:43 +0000 (17:31 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 30 Mar 2026 18:32:12 +0000 (14:32 -0400)
In amdgpu_device_fini_hw(), deferred coredump formatting work may still
be pending when hardware and IP components are being torn down. Since
the work may access device registers and memory that will be freed or
powered off, it must be completed before proceeding.

Add a flush_work() call for adev->coredump_work, guarded by
CONFIG_DEV_COREDUMP, to ensure any pending coredump work finishes
before the device enters the early IP fini stage.

This avoids potential use-after-free or accessing hardware resources
that are no longer available.

Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
Suggested-by: Lijo Lazar <lijo.lazar@amd.com>
Signed-off-by: Jesse Zhang <jesse.zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index 28198f3a6e0d0c10aed0acf2ce9fa2873fa9a021..fddf4e1252bdf7eb0c2c0eedd22c9d2a48a8455b 100644 (file)
@@ -35,6 +35,9 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,
 void amdgpu_coredump_init(struct amdgpu_device *adev)
 {
 }
+void amdgpu_coredump_fini(struct amdgpu_device *adev)
+{
+}
 #else
 
 #define AMDGPU_CORE_DUMP_SIZE_MAX (256 * 1024 * 1024)
@@ -440,4 +443,10 @@ void amdgpu_coredump_init(struct amdgpu_device *adev)
 {
        INIT_WORK(&adev->coredump_work, amdgpu_devcoredump_deferred_work);
 }
+
+void amdgpu_coredump_fini(struct amdgpu_device *adev)
+{
+       /* Finish deferred coredump formatting before HW/IP teardown. */
+       flush_work(&adev->coredump_work);
+}
 #endif
index b3582d0b4ca43bdf3516174640716e9bea57042f..f8f2f4df129b165ab6f137fc39ec134a29234666 100644 (file)
@@ -50,4 +50,5 @@ struct amdgpu_coredump_info {
 void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,
                     bool vram_lost, struct amdgpu_job *job);
 void amdgpu_coredump_init(struct amdgpu_device *adev);
+void amdgpu_coredump_fini(struct amdgpu_device *adev);
 #endif
index a7038f039b10539fd7137e5fbcf7adea7d7c5596..9c936519bb2bf200c9e5f6ac7ef72ab59d8399b5 100644 (file)
@@ -4225,6 +4225,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
        if (pci_dev_is_disconnected(adev->pdev))
                amdgpu_amdkfd_device_fini_sw(adev);
 
+       amdgpu_coredump_fini(adev);
        amdgpu_device_ip_fini_early(adev);
 
        amdgpu_irq_fini_hw(adev);