From: David Francis Date: Fri, 16 Jan 2026 15:21:15 +0000 (-0500) Subject: drm/amdgpu: Check for multiplication overflow in checkpoint stack size X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=421c0f19043337a553e802b2dfe4b56d538ef4d6;p=thirdparty%2Flinux.git drm/amdgpu: Check for multiplication overflow in checkpoint stack size get_checkpoint_info() in kfd_mqd_manager_v9.c finds 32-bit value ctl_stack_size by multiplying two 32-bit values. This can overflow to a lower value, which could result in copying outside the bounds of a buffer in checkpoint_mqd() in the same file. Put in a check for the overflow, and fail with -EINVAL if detected. v2: use check_mul_overflow() Signed-off-by: David Francis Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 3ddf06c755b52..ab3b2e7be9bd0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -2720,7 +2720,7 @@ static int get_wave_state(struct device_queue_manager *dqm, ctl_stack, ctl_stack_used_size, save_area_used_size); } -static void get_queue_checkpoint_info(struct device_queue_manager *dqm, +static int get_queue_checkpoint_info(struct device_queue_manager *dqm, const struct queue *q, u32 *mqd_size, u32 *ctl_stack_size) @@ -2728,6 +2728,7 @@ static void get_queue_checkpoint_info(struct device_queue_manager *dqm, struct mqd_manager *mqd_mgr; enum KFD_MQD_TYPE mqd_type = get_mqd_type_from_queue_type(q->properties.type); + int ret = 0; dqm_lock(dqm); mqd_mgr = dqm->mqd_mgrs[mqd_type]; @@ -2735,9 +2736,11 @@ static void get_queue_checkpoint_info(struct device_queue_manager *dqm, *ctl_stack_size = 0; if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE && mqd_mgr->get_checkpoint_info) - mqd_mgr->get_checkpoint_info(mqd_mgr, q->mqd, ctl_stack_size); + ret = mqd_mgr->get_checkpoint_info(mqd_mgr, q->mqd, ctl_stack_size); dqm_unlock(dqm); + + return ret; } static int checkpoint_mqd(struct device_queue_manager *dqm, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index ef07e44916f80..3272328da11f9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -192,7 +192,7 @@ struct device_queue_manager_ops { int (*reset_queues)(struct device_queue_manager *dqm, uint16_t pasid); - void (*get_queue_checkpoint_info)(struct device_queue_manager *dqm, + int (*get_queue_checkpoint_info)(struct device_queue_manager *dqm, const struct queue *q, u32 *mqd_size, u32 *ctl_stack_size); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h index 2429d278ef0eb..06ca6235ff1b7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h @@ -102,7 +102,8 @@ struct mqd_manager { u32 *ctl_stack_used_size, u32 *save_area_used_size); - void (*get_checkpoint_info)(struct mqd_manager *mm, void *mqd, uint32_t *ctl_stack_size); + int (*get_checkpoint_info)(struct mqd_manager *mm, void *mqd, + uint32_t *ctl_stack_size); void (*checkpoint_mqd)(struct mqd_manager *mm, void *mqd, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 19f21932a5ce7..979ae94ac9668 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -385,11 +385,14 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd, return 0; } -static void get_checkpoint_info(struct mqd_manager *mm, void *mqd, u32 *ctl_stack_size) +static int get_checkpoint_info(struct mqd_manager *mm, void *mqd, u32 *ctl_stack_size) { struct v9_mqd *m = get_mqd(mqd); - *ctl_stack_size = m->cp_hqd_cntl_stack_size * NUM_XCC(mm->dev->xcc_mask); + if (check_mul_overflow(m->cp_hqd_cntl_stack_size, NUM_XCC(mm->dev->xcc_mask), ctl_stack_size)) + return -EINVAL; + + return 0; } static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, void *ctl_stack_dst) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index f02ef2d44a07f..431a20323146b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -274,10 +274,11 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd, return 0; } -static void get_checkpoint_info(struct mqd_manager *mm, void *mqd, u32 *ctl_stack_size) +static int get_checkpoint_info(struct mqd_manager *mm, void *mqd, u32 *ctl_stack_size) { /* Control stack is stored in user mode */ *ctl_stack_size = 0; + return 0; } static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, void *ctl_stack_dst) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 8ea31699d38ba..586d409ebe4e9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -1069,6 +1069,7 @@ int pqm_get_queue_checkpoint_info(struct process_queue_manager *pqm, uint32_t *ctl_stack_size) { struct process_queue_node *pqn; + int ret; pqn = get_queue_by_qid(pqm, qid); if (!pqn) { @@ -1081,9 +1082,14 @@ int pqm_get_queue_checkpoint_info(struct process_queue_manager *pqm, return -EOPNOTSUPP; } - pqn->q->device->dqm->ops.get_queue_checkpoint_info(pqn->q->device->dqm, + ret = pqn->q->device->dqm->ops.get_queue_checkpoint_info(pqn->q->device->dqm, pqn->q, mqd_size, ctl_stack_size); + if (ret) { + pr_debug("amdkfd: Overflow while computing stack size for queue %d\n", qid); + return ret; + } + return 0; }