]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
drm/amdgpu: save ring content before resetting the device
authorPierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Wed, 4 Feb 2026 12:11:47 +0000 (13:11 +0100)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 3 Apr 2026 18:04:29 +0000 (14:04 -0400)
Otherwise the content might not be relevant.

When a coredump is generated the rings with outstanding fences
are saved and then printed to the final devcoredump from the
worker thread.
Since this requires memory allocation, the ring capture might
be missing from the generated devcoredump.

Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h

index fddf4e1252bdf7eb0c2c0eedd22c9d2a48a8455b..f54231005f5184b18f18aa6e9285983e95b10e53 100644 (file)
@@ -214,7 +214,9 @@ amdgpu_devcoredump_format(char *buffer, size_t count, struct amdgpu_coredump_inf
        struct drm_print_iterator iter;
        struct amdgpu_vm_fault_info *fault_info;
        struct amdgpu_ip_block *ip_block;
-       int ver;
+       struct amdgpu_ring *ring;
+       int ver, i, j;
+       u32 ring_idx, off;
 
        iter.data = buffer;
        iter.offset = 0;
@@ -303,23 +305,25 @@ amdgpu_devcoredump_format(char *buffer, size_t count, struct amdgpu_coredump_inf
 
        /* Add ring buffer information */
        drm_printf(&p, "Ring buffer information\n");
-       for (int i = 0; i < coredump->adev->num_rings; i++) {
-               int j = 0;
-               struct amdgpu_ring *ring = coredump->adev->rings[i];
-
-               drm_printf(&p, "ring name: %s\n", ring->name);
-               drm_printf(&p, "Rptr: 0x%llx Wptr: 0x%llx RB mask: %x\n",
-                          amdgpu_ring_get_rptr(ring),
-                          amdgpu_ring_get_wptr(ring),
-                          ring->buf_mask);
-               drm_printf(&p, "Ring size in dwords: %d\n",
-                          ring->ring_size / 4);
-               drm_printf(&p, "Ring contents\n");
-               drm_printf(&p, "Offset \t Value\n");
-
-               while (j < ring->ring_size) {
-                       drm_printf(&p, "0x%x \t 0x%x\n", j, ring->ring[j / 4]);
-                       j += 4;
+       if (coredump->num_rings) {
+               for (i = 0; i < coredump->num_rings; i++) {
+                       ring_idx = coredump->rings[i].ring_index;
+                       ring = coredump->adev->rings[ring_idx];
+                       off = coredump->rings[i].offset;
+
+                       drm_printf(&p, "ring name: %s\n", ring->name);
+                       drm_printf(&p, "Rptr: 0x%llx Wptr: 0x%llx RB mask: %x\n",
+                                  coredump->rings[i].rptr,
+                                  coredump->rings[i].wptr,
+                                  ring->buf_mask);
+                       drm_printf(&p, "Ring size in dwords: %d\n",
+                               ring->ring_size / 4);
+                       drm_printf(&p, "Ring contents\n");
+                       drm_printf(&p, "Offset \t Value\n");
+
+                       for (j = 0; j < ring->ring_size; j += 4)
+                               drm_printf(&p, "0x%x \t 0x%x\n", j,
+                                          coredump->rings_dw[off + j / 4]);
                }
        }
 
@@ -359,6 +363,8 @@ static void amdgpu_devcoredump_free(void *data)
        struct amdgpu_coredump_info *coredump = data;
 
        kvfree(coredump->formatted);
+       kvfree(coredump->rings);
+       kvfree(coredump->rings_dw);
        kvfree(data);
 }
 
@@ -396,6 +402,9 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,
        struct drm_device *dev = adev_to_drm(adev);
        struct amdgpu_coredump_info *coredump;
        struct drm_sched_job *s_job;
+       u64 total_ring_size, ring_count;
+       struct amdgpu_ring *ring;
+       int i, off, idx;
 
        /* No need to generate a new coredump if there's one in progress already. */
        if (work_pending(&adev->coredump_work))
@@ -423,6 +432,47 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,
                coredump->ring = to_amdgpu_ring(s_job->sched);
        }
 
+       /* Dump ring content if memory allocation succeeds. */
+       ring_count = 0;
+       total_ring_size = 0;
+       for (i = 0; i < adev->num_rings; i++) {
+               ring = adev->rings[i];
+
+               /* Only dump rings with unsignalled fences. */
+               if (atomic_read(&ring->fence_drv.last_seq) == ring->fence_drv.sync_seq &&
+                   coredump->ring != ring)
+                       continue;
+
+               total_ring_size += ring->ring_size;
+               ring_count++;
+       }
+       coredump->rings_dw = kzalloc(total_ring_size, GFP_NOWAIT);
+       coredump->rings = kcalloc(ring_count, sizeof(struct amdgpu_coredump_ring), GFP_NOWAIT);
+       if (coredump->rings && coredump->rings_dw) {
+               for (i = 0, off = 0, idx = 0; i < adev->num_rings; i++) {
+                       ring = adev->rings[i];
+
+                       if (atomic_read(&ring->fence_drv.last_seq) == ring->fence_drv.sync_seq &&
+                           coredump->ring != ring)
+                               continue;
+
+                       coredump->rings[idx].ring_index = ring->idx;
+                       coredump->rings[idx].rptr = amdgpu_ring_get_rptr(ring);
+                       coredump->rings[idx].wptr = amdgpu_ring_get_wptr(ring);
+                       coredump->rings[idx].offset = off;
+
+                       memcpy(&coredump->rings_dw[off], ring->ring, ring->ring_size);
+                       off += ring->ring_size;
+                       idx++;
+               }
+               coredump->num_rings = idx;
+       } else {
+               kvfree(coredump->rings_dw);
+               kvfree(coredump->rings);
+               coredump->rings_dw = NULL;
+               coredump->rings = NULL;
+       }
+
        coredump->adev = adev;
 
        ktime_get_ts64(&coredump->reset_time);
index f8f2f4df129b165ab6f137fc39ec134a29234666..d65e59050293c090453dd7b98ca221e7a1da8d3c 100644 (file)
 
 #define AMDGPU_COREDUMP_VERSION "1"
 
+struct amdgpu_coredump_ring {
+       u64                             rptr;
+       u64                             wptr;
+       u32                             ring_index;
+       u32                             offset;
+};
+
 struct amdgpu_coredump_info {
        struct amdgpu_device            *adev;
        struct amdgpu_task_info         reset_task_info;
@@ -39,6 +46,11 @@ struct amdgpu_coredump_info {
        bool                            skip_vram_check;
        bool                            reset_vram_lost;
        struct amdgpu_ring              *ring;
+
+       struct amdgpu_coredump_ring     *rings;
+       u32                             *rings_dw;
+       u32                             num_rings;
+
        /* Readable form of coredevdump, generate once to speed up
         * reading it (see drm_coredump_printer's documentation).
         */