]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
drm/amdgpu: Fix NULL pointer dereference in VRAM logic for APU devices
authorJesse.Zhang <Jesse.Zhang@amd.com>
Mon, 13 Oct 2025 05:46:12 +0000 (13:46 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 13 Oct 2025 18:14:15 +0000 (14:14 -0400)
Previously, APU platforms (and other scenarios with uninitialized VRAM managers)
triggered a NULL pointer dereference in `ttm_resource_manager_usage()`. The root
cause is not that the `struct ttm_resource_manager *man` pointer itself is NULL,
but that `man->bdev` (the backing device pointer within the manager) remains
uninitialized (NULL) on APUs—since APUs lack dedicated VRAM and do not fully
set up VRAM manager structures. When `ttm_resource_manager_usage()` attempts to
acquire `man->bdev->lru_lock`, it dereferences the NULL `man->bdev`, leading to
a kernel OOPS.

1. **amdgpu_cs.c**: Extend the existing bandwidth control check in
   `amdgpu_cs_get_threshold_for_moves()` to include a check for
   `ttm_resource_manager_used()`. If the manager is not used (uninitialized
   `bdev`), return 0 for migration thresholds immediately—skipping VRAM-specific
   logic that would trigger the NULL dereference.

2. **amdgpu_kms.c**: Update the `AMDGPU_INFO_VRAM_USAGE` ioctl and memory info
   reporting to use a conditional: if the manager is used, return the real VRAM
   usage; otherwise, return 0. This avoids accessing `man->bdev` when it is
   NULL.

3. **amdgpu_virt.c**: Modify the vf2pf (virtual function to physical function)
   data write path. Use `ttm_resource_manager_used()` to check validity: if the
   manager is usable, calculate `fb_usage` from VRAM usage; otherwise, set
   `fb_usage` to 0 (APUs have no discrete framebuffer to report).

This approach is more robust than APU-specific checks because it:
- Works for all scenarios where the VRAM manager is uninitialized (not just APUs),
- Aligns with TTM's design by using its native helper function,
- Preserves correct behavior for discrete GPUs (which have fully initialized
  `man->bdev` and pass the `ttm_resource_manager_used()` check).

v4: use ttm_resource_manager_used(&adev->mman.vram_mgr.manager) instead of checking the adev->gmc.is_app_apu flag (Christian)

Reviewed-by: Christian König <christian.koenig@amd.com>
Suggested-by: Lijo Lazar <lijo.lazar@amd.com>
Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c

index ba9fb08db0947194c3c8d392394dfa04113286c8..2f6a96af7fb12b2efb3113feedf82f4afebcc1e0 100644 (file)
@@ -708,7 +708,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
         */
        const s64 us_upper_bound = 200000;
 
-       if (!adev->mm_stats.log2_max_MBps) {
+       if ((!adev->mm_stats.log2_max_MBps) || !ttm_resource_manager_used(&adev->mman.vram_mgr.manager)) {
                *max_bytes = 0;
                *max_vis_bytes = 0;
                return;
index a9327472c65147fb35e1e6c0f4af4942e60d17ed..b3e6b3fcdf2cb1d0167238ae342b131b7d41b8ae 100644 (file)
@@ -758,7 +758,8 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
                ui64 = atomic64_read(&adev->num_vram_cpu_page_faults);
                return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
        case AMDGPU_INFO_VRAM_USAGE:
-               ui64 = ttm_resource_manager_usage(&adev->mman.vram_mgr.manager);
+               ui64 = ttm_resource_manager_used(&adev->mman.vram_mgr.manager) ?
+                       ttm_resource_manager_usage(&adev->mman.vram_mgr.manager) : 0;
                return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
        case AMDGPU_INFO_VIS_VRAM_USAGE:
                ui64 = amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr);
@@ -804,8 +805,8 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
                mem.vram.usable_heap_size = adev->gmc.real_vram_size -
                        atomic64_read(&adev->vram_pin_size) -
                        AMDGPU_VM_RESERVED_VRAM;
-               mem.vram.heap_usage =
-                       ttm_resource_manager_usage(vram_man);
+               mem.vram.heap_usage = ttm_resource_manager_used(&adev->mman.vram_mgr.manager) ?
+                               ttm_resource_manager_usage(vram_man) : 0;
                mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4;
 
                mem.cpu_accessible_vram.total_heap_size =
index 3328ab63376bb18a8c54b9034165d8b39a663ce3..f96beb96c75ccb4e99fd5b55e2ad191590d9c075 100644 (file)
@@ -598,8 +598,8 @@ static int amdgpu_virt_write_vf2pf_data(struct amdgpu_device *adev)
        vf2pf_info->driver_cert = 0;
        vf2pf_info->os_info.all = 0;
 
-       vf2pf_info->fb_usage =
-               ttm_resource_manager_usage(&adev->mman.vram_mgr.manager) >> 20;
+       vf2pf_info->fb_usage = ttm_resource_manager_used(&adev->mman.vram_mgr.manager) ?
+                ttm_resource_manager_usage(&adev->mman.vram_mgr.manager) >> 20 : 0;
        vf2pf_info->fb_vis_usage =
                amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr) >> 20;
        vf2pf_info->fb_size = adev->gmc.real_vram_size >> 20;