]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
drm/amdgpu: Fix NULL pointer dereference in VRAM logic for APU devices
authorJesse.Zhang <Jesse.Zhang@amd.com>
Mon, 13 Oct 2025 05:46:12 +0000 (13:46 +0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 24 Nov 2025 09:35:47 +0000 (10:35 +0100)
[ Upstream commit 883f309add55060233bf11c1ea6947140372920f ]

Previously, APU platforms (and other scenarios with uninitialized VRAM managers)
triggered a NULL pointer dereference in `ttm_resource_manager_usage()`. The root
cause is not that the `struct ttm_resource_manager *man` pointer itself is NULL,
but that `man->bdev` (the backing device pointer within the manager) remains
uninitialized (NULL) on APUs—since APUs lack dedicated VRAM and do not fully
set up VRAM manager structures. When `ttm_resource_manager_usage()` attempts to
acquire `man->bdev->lru_lock`, it dereferences the NULL `man->bdev`, leading to
a kernel OOPS.

1. **amdgpu_cs.c**: Extend the existing bandwidth control check in
   `amdgpu_cs_get_threshold_for_moves()` to include a check for
   `ttm_resource_manager_used()`. If the manager is not used (uninitialized
   `bdev`), return 0 for migration thresholds immediately—skipping VRAM-specific
   logic that would trigger the NULL dereference.

2. **amdgpu_kms.c**: Update the `AMDGPU_INFO_VRAM_USAGE` ioctl and memory info
   reporting to use a conditional: if the manager is used, return the real VRAM
   usage; otherwise, return 0. This avoids accessing `man->bdev` when it is
   NULL.

3. **amdgpu_virt.c**: Modify the vf2pf (virtual function to physical function)
   data write path. Use `ttm_resource_manager_used()` to check validity: if the
   manager is usable, calculate `fb_usage` from VRAM usage; otherwise, set
   `fb_usage` to 0 (APUs have no discrete framebuffer to report).

This approach is more robust than APU-specific checks because it:
- Works for all scenarios where the VRAM manager is uninitialized (not just APUs),
- Aligns with TTM's design by using its native helper function,
- Preserves correct behavior for discrete GPUs (which have fully initialized
  `man->bdev` and pass the `ttm_resource_manager_used()` check).

v4: use ttm_resource_manager_used(&adev->mman.vram_mgr.manager) instead of checking the adev->gmc.is_app_apu flag (Christian)

Reviewed-by: Christian König <christian.koenig@amd.com>
Suggested-by: Lijo Lazar <lijo.lazar@amd.com>
Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c

index 082fc12fe28dc570232d94ae727e46fa1d553600..844e49d1499ed3e2734e3bb8aec91e31bebdfb00 100644 (file)
@@ -691,7 +691,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
         */
        const s64 us_upper_bound = 200000;
 
-       if (!adev->mm_stats.log2_max_MBps) {
+       if ((!adev->mm_stats.log2_max_MBps) || !ttm_resource_manager_used(&adev->mman.vram_mgr.manager)) {
                *max_bytes = 0;
                *max_vis_bytes = 0;
                return;
index 016a6f6c4267b44b8a02f2b7424a50e792f46e7c..1291ca57a1cb331b2cbdeb6eef8fd04799df29b3 100644 (file)
@@ -707,7 +707,8 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
                ui64 = atomic64_read(&adev->num_vram_cpu_page_faults);
                return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
        case AMDGPU_INFO_VRAM_USAGE:
-               ui64 = ttm_resource_manager_usage(&adev->mman.vram_mgr.manager);
+               ui64 = ttm_resource_manager_used(&adev->mman.vram_mgr.manager) ?
+                       ttm_resource_manager_usage(&adev->mman.vram_mgr.manager) : 0;
                return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
        case AMDGPU_INFO_VIS_VRAM_USAGE:
                ui64 = amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr);
@@ -753,8 +754,8 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
                mem.vram.usable_heap_size = adev->gmc.real_vram_size -
                        atomic64_read(&adev->vram_pin_size) -
                        AMDGPU_VM_RESERVED_VRAM;
-               mem.vram.heap_usage =
-                       ttm_resource_manager_usage(vram_man);
+               mem.vram.heap_usage = ttm_resource_manager_used(&adev->mman.vram_mgr.manager) ?
+                               ttm_resource_manager_usage(vram_man) : 0;
                mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4;
 
                mem.cpu_accessible_vram.total_heap_size =
index 01dccd489a805021d7628f0346ac78c7ad8393eb..9247cd7b1868c718d79c9a396f5de690465ff008 100644 (file)
@@ -595,8 +595,8 @@ static int amdgpu_virt_write_vf2pf_data(struct amdgpu_device *adev)
        vf2pf_info->driver_cert = 0;
        vf2pf_info->os_info.all = 0;
 
-       vf2pf_info->fb_usage =
-               ttm_resource_manager_usage(&adev->mman.vram_mgr.manager) >> 20;
+       vf2pf_info->fb_usage = ttm_resource_manager_used(&adev->mman.vram_mgr.manager) ?
+                ttm_resource_manager_usage(&adev->mman.vram_mgr.manager) >> 20 : 0;
        vf2pf_info->fb_vis_usage =
                amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr) >> 20;
        vf2pf_info->fb_size = adev->gmc.real_vram_size >> 20;