]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
drm/amdgpu: Show warning message if IH ring overflow
authorPhilip Yang <Philip.Yang@amd.com>
Tue, 3 Dec 2024 15:00:25 +0000 (10:00 -0500)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 18 Dec 2024 17:39:07 +0000 (12:39 -0500)
If IH primary ring and KFD ih fifo overflows, we may miss CP, SDMA
interrupts and cause application soft hang. Show warning message with
ring name if overflow happens.

Add function to get ih ring name to avoid duplicating it. To keep
warning message consistent between GPU generations, change all
*_ih.c except ASICs older than Vega which has only one ih ring.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
drivers/gpu/drm/amd/amdgpu/navi10_ih.c
drivers/gpu/drm/amd/amdgpu/vega10_ih.c
drivers/gpu/drm/amd/amdgpu/vega20_ih.c
drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c

index f3b0aaf3ebc69e7f90f8cf0c3f0e5d3417991669..901f8b12c672d14ddc10484d2fa6418af767b68a 100644 (file)
@@ -298,3 +298,9 @@ uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr,
        dw2 = le32_to_cpu(ih->ring[ring_index + 2]);
        return dw1 | ((u64)(dw2 & 0xffff) << 32);
 }
+
+const char *amdgpu_ih_ring_name(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
+{
+       return ih == &adev->irq.ih ? "ih" : ih == &adev->irq.ih_soft ? "sw ih" :
+              ih == &adev->irq.ih1 ? "ih1" : ih == &adev->irq.ih2 ? "ih2" : "unknown";
+}
index 508f02eb0cf8f958d26853cd7b4698091554d94e..7d4395a5d8ac9f3194a50e296304b1bec862ad47 100644 (file)
@@ -110,4 +110,5 @@ void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
                                struct amdgpu_iv_entry *entry);
 uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr,
                                       signed int offset);
+const char *amdgpu_ih_ring_name(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
 #endif
index ebc2ab9c3c5c49a814e5ad558b78dd1620329920..62cdfe10e6f41b83b5c0f42d104395045d0f9006 100644 (file)
@@ -434,9 +434,8 @@ static u32 navi10_ih_get_wptr(struct amdgpu_device *adev,
         * this should allow us to catch up.
         */
        tmp = (wptr + 32) & ih->ptr_mask;
-       dev_warn(adev->dev, "IH ring buffer overflow "
-                "(0x%08X, 0x%08X, 0x%08X)\n",
-                wptr, ih->rptr, tmp);
+       dev_warn(adev->dev, "%s ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
+                amdgpu_ih_ring_name(adev, ih), wptr, ih->rptr, tmp);
        ih->rptr = tmp;
 
        tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
index 378da889e0754e9d3c06969519b7163c4c3c9f51..98fc6941159e1925e04e08a57a2c6189fd7f099e 100644 (file)
@@ -364,9 +364,8 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev,
         * this should allow us to catchup.
         */
        tmp = (wptr + 32) & ih->ptr_mask;
-       dev_warn(adev->dev, "IH ring buffer overflow "
-                "(0x%08X, 0x%08X, 0x%08X)\n",
-                wptr, ih->rptr, tmp);
+       dev_warn_ratelimited(adev->dev, "%s ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
+                            amdgpu_ih_ring_name(adev, ih), wptr, ih->rptr, tmp);
        ih->rptr = tmp;
 
        tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
index 2c1c4b788b6d9990f6fc7270b3b7fafd2800612a..e9e3b2ed4b7bfe6c62a2c88e09f671852cbade2f 100644 (file)
@@ -444,9 +444,8 @@ static u32 vega20_ih_get_wptr(struct amdgpu_device *adev,
         * this should allow us to catchup.
         */
        tmp = (wptr + 32) & ih->ptr_mask;
-       dev_warn(adev->dev, "IH ring buffer overflow "
-                "(0x%08X, 0x%08X, 0x%08X)\n",
-                wptr, ih->rptr, tmp);
+       dev_warn_ratelimited(adev->dev, "%s ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
+                            amdgpu_ih_ring_name(adev, ih), wptr, ih->rptr, tmp);
        ih->rptr = tmp;
 
        tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
index 6beb786c582a7d0622a13a1ee88a4b75701e4da4..783c2f5a04e4bd016143a3bafe867fe474f80b39 100644 (file)
@@ -108,8 +108,8 @@ void kfd_interrupt_exit(struct kfd_node *node)
 bool enqueue_ih_ring_entry(struct kfd_node *node, const void *ih_ring_entry)
 {
        if (kfifo_is_full(&node->ih_fifo)) {
-               dev_dbg_ratelimited(node->adev->dev,
-                                   "Interrupt ring overflow, dropping interrupt\n");
+               dev_warn_ratelimited(node->adev->dev, "KFD node %d ih_fifo overflow\n",
+                                    node->node_id);
                return false;
        }