]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.19-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 20 Mar 2026 13:47:02 +0000 (14:47 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 20 Mar 2026 13:47:02 +0000 (14:47 +0100)
added patches:
drm-amd-fix-hang-on-amdgpu-unload-by-using-pci_dev_is_disconnected.patch

queue-6.19/drm-amd-fix-hang-on-amdgpu-unload-by-using-pci_dev_is_disconnected.patch [new file with mode: 0644]
queue-6.19/series

diff --git a/queue-6.19/drm-amd-fix-hang-on-amdgpu-unload-by-using-pci_dev_is_disconnected.patch b/queue-6.19/drm-amd-fix-hang-on-amdgpu-unload-by-using-pci_dev_is_disconnected.patch
new file mode 100644 (file)
index 0000000..c0cbc0b
--- /dev/null
@@ -0,0 +1,62 @@
+From f7afda7fcd169a9168695247d07ad94cf7b9798f Mon Sep 17 00:00:00 2001
+From: Mario Limonciello <mario.limonciello@amd.com>
+Date: Thu, 5 Feb 2026 10:42:54 -0600
+Subject: drm/amd: Fix hang on amdgpu unload by using pci_dev_is_disconnected()
+
+From: Mario Limonciello <mario.limonciello@amd.com>
+
+commit f7afda7fcd169a9168695247d07ad94cf7b9798f upstream.
+
+The commit 6a23e7b4332c ("drm/amd: Clean up kfd node on surprise
+disconnect") introduced early KFD cleanup when drm_dev_is_unplugged()
+returns true. However, this causes hangs during normal module unload
+(rmmod amdgpu).
+
+The issue occurs because drm_dev_unplug() is called in amdgpu_pci_remove()
+for all removal scenarios, not just surprise disconnects. This was done
+intentionally in commit 39934d3ed572 ("Revert "drm/amdgpu: TA unload
+messages are not actually sent to psp when amdgpu is uninstalled"") to
+fix IGT PCI software unplug test failures. As a result,
+drm_dev_is_unplugged() returns true even during normal module unload,
+triggering the early KFD cleanup inappropriately.
+
+The correct check should distinguish between:
+- Actual surprise disconnect (eGPU unplugged): pci_dev_is_disconnected()
+  returns true
+- Normal module unload (rmmod): pci_dev_is_disconnected() returns false
+
+Replace drm_dev_is_unplugged() with pci_dev_is_disconnected() to ensure
+the early cleanup only happens during true hardware disconnect events.
+
+Cc: stable@vger.kernel.org
+Reported-by: Cal Peake <cp@absolutedigital.net>
+Closes: https://lore.kernel.org/all/b0c22deb-c0fa-3343-33cf-fd9a77d7db99@absolutedigital.net/
+Fixes: 6a23e7b4332c ("drm/amd: Clean up kfd node on surprise disconnect")
+Acked-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+@@ -5081,7 +5081,7 @@ void amdgpu_device_fini_hw(struct amdgpu
+        * before ip_fini_early to prevent kfd locking refcount issues by calling
+        * amdgpu_amdkfd_suspend()
+        */
+-      if (drm_dev_is_unplugged(adev_to_drm(adev)))
++      if (pci_dev_is_disconnected(adev->pdev))
+               amdgpu_amdkfd_device_fini_sw(adev);
+       amdgpu_device_ip_fini_early(adev);
+@@ -5093,7 +5093,7 @@ void amdgpu_device_fini_hw(struct amdgpu
+       amdgpu_gart_dummy_page_fini(adev);
+-      if (drm_dev_is_unplugged(adev_to_drm(adev)))
++      if (pci_dev_is_disconnected(adev->pdev))
+               amdgpu_device_unmap_mmio(adev);
+ }
index b227b4cc69298fb55e4bb04665bde2f41e8083f7..12837b238e5889b794f8e453edd9ca83b603bb69 100644 (file)
@@ -20,3 +20,4 @@ mac80211-fix-crash-in-ieee80211_chan_bw_change-for-ap_vlan-stations.patch
 crypto-padlock-sha-disable-for-zhaoxin-processor.patch
 bluetooth-l2cap-fix-type-confusion-in-l2cap_ecred_reconf_rsp.patch
 bluetooth-l2cap-validate-l2cap_info_rsp-payload-length-before-access.patch
+drm-amd-fix-hang-on-amdgpu-unload-by-using-pci_dev_is_disconnected.patch