]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
drm/amdgpu: fix lock warning in amdgpu_userq_fence_driver_process
authorJesse.Zhang <Jesse.Zhang@amd.com>
Fri, 24 Oct 2025 08:09:25 +0000 (16:09 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 12 Nov 2025 03:50:22 +0000 (22:50 -0500)
Fix a potential deadlock caused by inconsistent spinlock usage
between interrupt and process contexts in the userq fence driver.

The issue occurs when amdgpu_userq_fence_driver_process() is called
from both:
- Interrupt context: gfx_v11_0_eop_irq() -> amdgpu_userq_fence_driver_process()
- Process context: amdgpu_eviction_fence_suspend_worker() ->
  amdgpu_userq_fence_driver_force_completion() -> amdgpu_userq_fence_driver_process()

In interrupt context, the spinlock was acquired without disabling
interrupts, leaving it in {IN-HARDIRQ-W} state. When the same lock
is acquired in process context, the kernel detects inconsistent
locking since the process context acquisition would enable interrupts
while holding a lock previously acquired in interrupt context.

Kernel log shows:
[ 4039.310790] inconsistent {IN-HARDIRQ-W} -> {HARDIRQ-ON-W} usage.
[ 4039.310804] kworker/7:2/409 [HC0[0]:SC0[0]:HE1:SE1] takes:
[ 4039.310818] ffff9284e1bed000 (&fence_drv->fence_list_lock){?...}-{3:3},
[ 4039.310993] {IN-HARDIRQ-W} state was registered at:
[ 4039.311004]   lock_acquire+0xc6/0x300
[ 4039.311018]   _raw_spin_lock+0x39/0x80
[ 4039.311031]   amdgpu_userq_fence_driver_process.part.0+0x30/0x180 [amdgpu]
[ 4039.311146]   amdgpu_userq_fence_driver_process+0x17/0x30 [amdgpu]
[ 4039.311257]   gfx_v11_0_eop_irq+0x132/0x170 [amdgpu]

Fix by using spin_lock_irqsave()/spin_unlock_irqrestore() to properly
manage interrupt state regardless of calling context.

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit ded3ad780cf97a04927773c4600823b84f7f3cc2)
Cc: stable@vger.kernel.org
drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c

index 761bad98da3ec45b7519df72725daa02eb5346c3..4d0096d0baa9d0012ef84503d802dd663871e3f3 100644 (file)
@@ -151,15 +151,16 @@ void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_d
 {
        struct amdgpu_userq_fence *userq_fence, *tmp;
        struct dma_fence *fence;
+       unsigned long flags;
        u64 rptr;
        int i;
 
        if (!fence_drv)
                return;
 
+       spin_lock_irqsave(&fence_drv->fence_list_lock, flags);
        rptr = amdgpu_userq_fence_read(fence_drv);
 
-       spin_lock(&fence_drv->fence_list_lock);
        list_for_each_entry_safe(userq_fence, tmp, &fence_drv->fences, link) {
                fence = &userq_fence->base;
 
@@ -174,7 +175,7 @@ void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_d
                list_del(&userq_fence->link);
                dma_fence_put(fence);
        }
-       spin_unlock(&fence_drv->fence_list_lock);
+       spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags);
 }
 
 void amdgpu_userq_fence_driver_destroy(struct kref *ref)