]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
drm/amdkfd: Fix buffer overflow in SDMA queue checkpoint/restore on GFX11
authorAndrew Martin <andrew.martin@amd.com>
Thu, 28 May 2026 16:54:39 +0000 (12:54 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 3 Jun 2026 18:54:46 +0000 (14:54 -0400)
The v11 MQD manager incorrectly assigned the CP-compute variants of
checkpoint_mqd/restore_mqd for KFD_MQD_TYPE_SDMA queues. These functions
use sizeof(struct v11_compute_mqd) (2048 bytes) instead of sizeof(struct
v11_sdma_mqd) (512 bytes), causing a 1536-byte overflow.

During CRIU checkpoint of an SDMA queue on Navi3x:
- checkpoint_mqd() reads 2048 bytes from a 512-byte SDMA MQD buffer,
  leaking 1536 bytes of adjacent GTT memory to userspace

During CRIU restore:
- restore_mqd() writes 2048 bytes into a 512-byte SDMA MQD buffer,
  corrupting 1536 bytes of adjacent GTT memory (often the ring buffer
  or neighboring MQDs)

This is a copy-paste regression unique to v11. All other ASIC backends
(cik, vi, v9, v10, v12) correctly use the SDMA-specific variants.

Add checkpoint_mqd_sdma() and restore_mqd_sdma() functions that properly
handle the smaller v11_sdma_mqd structure, matching the pattern used in
other MQD managers.

Fixes: cc009e613de6 ("drm/amdkfd: Add KFD support for soc21 v3")
Assisted-by: Claude:Sonnet 4-5
Signed-off-by: Andrew Martin <andrew.martin@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 6fa41db7ffdec97d62433adf03b7b9b759af8c2c)
Cc: stable@vger.kernel.org
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c

index a1e3cf2384dd3b6574029e01fbf882f9acdb615f..527c531676e43cf070a0105b6b2cb136962fcec1 100644 (file)
@@ -320,8 +320,7 @@ static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, voi
 
 static void restore_mqd(struct mqd_manager *mm, void **mqd,
                        struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
-                       struct queue_properties *qp,
-                       const void *mqd_src,
+                       struct queue_properties *qp, const void *mqd_src,
                        const void *ctl_stack_src, const u32 ctl_stack_size)
 {
        uint64_t addr;
@@ -337,14 +336,48 @@ static void restore_mqd(struct mqd_manager *mm, void **mqd,
                *gart_addr = addr;
 
        m->cp_hqd_pq_doorbell_control =
-               qp->doorbell_off <<
-                       CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
-       pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
-                       m->cp_hqd_pq_doorbell_control);
+               qp->doorbell_off << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+       pr_debug("cp_hqd_pq_doorbell_control 0x%x\n", m->cp_hqd_pq_doorbell_control);
 
        qp->is_active = 0;
 }
 
+static void checkpoint_mqd_sdma(struct mqd_manager *mm,
+                               void *mqd,
+                               void *mqd_dst,
+                               void *ctl_stack_dst)
+{
+       struct v11_sdma_mqd *m;
+
+       m = get_sdma_mqd(mqd);
+
+       memcpy(mqd_dst, m, sizeof(struct v11_sdma_mqd));
+}
+
+static void restore_mqd_sdma(struct mqd_manager *mm, void **mqd,
+                            struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+                            struct queue_properties *qp,
+                            const void *mqd_src,
+                            const void *ctl_stack_src,
+                            const u32 ctl_stack_size)
+{
+       uint64_t addr;
+       struct v11_sdma_mqd *m;
+
+       m = (struct v11_sdma_mqd *) mqd_mem_obj->cpu_ptr;
+       addr = mqd_mem_obj->gpu_addr;
+
+       memcpy(m, mqd_src, sizeof(*m));
+
+       m->sdmax_rlcx_doorbell_offset =
+               qp->doorbell_off << SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT;
+
+       *mqd = m;
+       if (gart_addr)
+               *gart_addr = addr;
+
+       qp->is_active = 0;
+}
 
 static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
                        struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
@@ -529,8 +562,8 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type,
                mqd->update_mqd = update_mqd_sdma;
                mqd->destroy_mqd = kfd_destroy_mqd_sdma;
                mqd->is_occupied = kfd_is_occupied_sdma;
-               mqd->checkpoint_mqd = checkpoint_mqd;
-               mqd->restore_mqd = restore_mqd;
+               mqd->checkpoint_mqd = checkpoint_mqd_sdma;
+               mqd->restore_mqd = restore_mqd_sdma;
                mqd->mqd_size = sizeof(struct v11_sdma_mqd);
                mqd->mqd_stride = kfd_mqd_stride;
 #if defined(CONFIG_DEBUG_FS)