]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
drm/amdkfd: relax checks for over allocation of save area
authorJonathan Kim <jonathan.kim@amd.com>
Thu, 6 Nov 2025 15:17:06 +0000 (10:17 -0500)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 24 Nov 2025 09:36:03 +0000 (10:36 +0100)
commit d15deafab5d722afb9e2f83c5edcdef9d9d98bd1 upstream.

Over allocation of save area is not fatal, only under allocation is.
ROCm has various components that independently claim authority over save
area size.

Unless KFD decides to claim single authority, relax size checks.

Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>
Reviewed-by: Philip Yang <philip.yang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 15bd4958fe38e763bc17b607ba55155254a01f55)
Cc: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
drivers/gpu/drm/amd/amdkfd/kfd_queue.c

index 29d7cb4cfe69ae5b7d02b31145bb72df76440139..94937b824e9882bdfaf33ed571f07c93109c47d1 100644 (file)
@@ -297,16 +297,16 @@ int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_prope
                goto out_err_unreserve;
        }
 
-       if (properties->ctx_save_restore_area_size != topo_dev->node_props.cwsr_size) {
-               pr_debug("queue cwsr size 0x%x not equal to node cwsr size 0x%x\n",
+       if (properties->ctx_save_restore_area_size < topo_dev->node_props.cwsr_size) {
+               pr_debug("queue cwsr size 0x%x not sufficient for node cwsr size 0x%x\n",
                        properties->ctx_save_restore_area_size,
                        topo_dev->node_props.cwsr_size);
                err = -EINVAL;
                goto out_err_unreserve;
        }
 
-       total_cwsr_size = (topo_dev->node_props.cwsr_size + topo_dev->node_props.debug_memory_size)
-                         * NUM_XCC(pdd->dev->xcc_mask);
+       total_cwsr_size = (properties->ctx_save_restore_area_size +
+                          topo_dev->node_props.debug_memory_size) * NUM_XCC(pdd->dev->xcc_mask);
        total_cwsr_size = ALIGN(total_cwsr_size, PAGE_SIZE);
 
        err = kfd_queue_buffer_get(vm, (void *)properties->ctx_save_restore_area_address,
@@ -352,8 +352,8 @@ int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_prope
        topo_dev = kfd_topology_device_by_id(pdd->dev->id);
        if (!topo_dev)
                return -EINVAL;
-       total_cwsr_size = (topo_dev->node_props.cwsr_size + topo_dev->node_props.debug_memory_size)
-                         * NUM_XCC(pdd->dev->xcc_mask);
+       total_cwsr_size = (properties->ctx_save_restore_area_size +
+                          topo_dev->node_props.debug_memory_size) * NUM_XCC(pdd->dev->xcc_mask);
        total_cwsr_size = ALIGN(total_cwsr_size, PAGE_SIZE);
 
        kfd_queue_buffer_svm_put(pdd, properties->ctx_save_restore_area_address, total_cwsr_size);