]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
drm/amdgpu: Fix context pstate override handling
authorTvrtko Ursulin <tvrtko.ursulin@igalia.com>
Mon, 1 Jun 2026 14:08:22 +0000 (15:08 +0100)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 17 Jun 2026 22:26:40 +0000 (18:26 -0400)
There are several problems in the context pstate handling code.

The most serious ones are potential use-after-free and NULL pointer
dereferences at context initialization time. Both are due
amdgpu_ctx_init() not holding the adev->pm.stable_pstate_ctx_lock, which
is otherwise used from both sysfs and the context code itself for
modifying and clearing the stored context pointer.

Second issue is that context fini can trample over the pstate
configuration set via sysfs. This is due the restore state
(ctx->stable_pstate) being saved at context init time, and not if, or when
the context actually changes the pstate. As the context exits it will
therefore incorrectly restore to what was set before the sysfs override
was requested.

The simplest fix is to drastically simplify how the state is tracked, by
clearly defining the points at which pstate ownership is taken and
released, and to handle all transitions under the correct lock.

Instead of at context init time, the previous state is saved only at the
point the context overrides the current state, and is restored on context
exit only if the context is still the owner of the current override state.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Fixes: 79610d304133 ("drm/amdgpu: fix pstate setting issue")
Cc: Chengming Gui <Jack.Gui@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: "Christian König" <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 1b5e413713c0a93bc1818394d0ce49aaad21bd27)
Cc: <stable@vger.kernel.org> # v6.1+
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c

index 0d7f6cd74f79be2ea3ad0afcc3aa49a572a5fe34..ce35b415093d670aa677067973fbd00e9e03b99c 100644 (file)
@@ -326,7 +326,6 @@ static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
                           struct drm_file *filp, struct amdgpu_ctx *ctx)
 {
        struct amdgpu_fpriv *fpriv = filp->driver_priv;
-       u32 current_stable_pstate;
        int r;
 
        r = amdgpu_ctx_priority_permit(filp, priority);
@@ -344,36 +343,21 @@ static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
        ctx->generation = amdgpu_vm_generation(mgr->adev, &fpriv->vm);
        ctx->init_priority = priority;
        ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET;
-
-       r = amdgpu_ctx_get_stable_pstate(ctx, &current_stable_pstate);
-       if (r)
-               return r;
-
-       if (mgr->adev->pm.stable_pstate_ctx)
-               ctx->stable_pstate = mgr->adev->pm.stable_pstate_ctx->stable_pstate;
-       else
-               ctx->stable_pstate = current_stable_pstate;
+       ctx->stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE;
 
        return 0;
 }
 
-static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx,
-                                       u32 stable_pstate)
+static int __amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx,
+                                         u32 stable_pstate)
 {
        struct amdgpu_device *adev = ctx->mgr->adev;
        enum amd_dpm_forced_level level;
+       struct amdgpu_ctx *current_ctx;
        u32 current_stable_pstate;
-       int r;
+       int r = 0;
 
-       mutex_lock(&adev->pm.stable_pstate_ctx_lock);
-       if (adev->pm.stable_pstate_ctx && adev->pm.stable_pstate_ctx != ctx) {
-               r = -EBUSY;
-               goto done;
-       }
-
-       r = amdgpu_ctx_get_stable_pstate(ctx, &current_stable_pstate);
-       if (r || (stable_pstate == current_stable_pstate))
-               goto done;
+       lockdep_assert_held(&adev->pm.stable_pstate_ctx_lock);
 
        switch (stable_pstate) {
        case AMDGPU_CTX_STABLE_PSTATE_NONE:
@@ -392,17 +376,41 @@ static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx,
                level = AMD_DPM_FORCED_LEVEL_PROFILE_PEAK;
                break;
        default:
-               r = -EINVAL;
-               goto done;
+               return -EINVAL;
        }
 
+       current_ctx = adev->pm.stable_pstate_ctx;
+       if (current_ctx && current_ctx != ctx)
+               return -EBUSY;
+
+       r = amdgpu_ctx_get_stable_pstate(ctx, &current_stable_pstate);
+       if (r || current_stable_pstate == stable_pstate)
+               return r;
+
        r = amdgpu_dpm_force_performance_level(adev, level);
+       if (r)
+               return r;
 
-       if (level == AMD_DPM_FORCED_LEVEL_AUTO)
-               adev->pm.stable_pstate_ctx = NULL;
-       else
+       if (!current_ctx) {
                adev->pm.stable_pstate_ctx = ctx;
-done:
+               /*
+                * Serialized by context taking ownership for the first time
+                * while holding adev->pm.stable_pstate_ctx_lock).
+                */
+               WRITE_ONCE(ctx->stable_pstate, current_stable_pstate);
+       }
+
+       return 0;
+}
+
+static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx,
+                                       u32 stable_pstate)
+{
+       struct amdgpu_device *adev = ctx->mgr->adev;
+       int r;
+
+       mutex_lock(&adev->pm.stable_pstate_ctx_lock);
+       r = __amdgpu_ctx_set_stable_pstate(ctx, stable_pstate);
        mutex_unlock(&adev->pm.stable_pstate_ctx_lock);
 
        return r;
@@ -428,7 +436,12 @@ static void amdgpu_ctx_fini(struct kref *ref)
        }
 
        if (drm_dev_enter(adev_to_drm(adev), &idx)) {
-               amdgpu_ctx_set_stable_pstate(ctx, ctx->stable_pstate);
+               mutex_lock(&adev->pm.stable_pstate_ctx_lock);
+               if (adev->pm.stable_pstate_ctx == ctx) {
+                       __amdgpu_ctx_set_stable_pstate(ctx, ctx->stable_pstate);
+                       adev->pm.stable_pstate_ctx = NULL;
+               }
+               mutex_unlock(&adev->pm.stable_pstate_ctx_lock);
                drm_dev_exit(idx);
        }