From: Tvrtko Ursulin Date: Mon, 1 Jun 2026 14:08:22 +0000 (+0100) Subject: drm/amdgpu: Fix context pstate override handling X-Git-Tag: v7.2-rc1~10^2~1^2~8 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c1dc4ccb82c9e56325d8e7514ca4c90bd1efb351;p=thirdparty%2Fkernel%2Flinux.git drm/amdgpu: Fix context pstate override handling There are several problems in the context pstate handling code. The most serious ones are potential use-after-free and NULL pointer dereferences at context initialization time. Both are due amdgpu_ctx_init() not holding the adev->pm.stable_pstate_ctx_lock, which is otherwise used from both sysfs and the context code itself for modifying and clearing the stored context pointer. Second issue is that context fini can trample over the pstate configuration set via sysfs. This is due the restore state (ctx->stable_pstate) being saved at context init time, and not if, or when the context actually changes the pstate. As the context exits it will therefore incorrectly restore to what was set before the sysfs override was requested. The simplest fix is to drastically simplify how the state is tracked, by clearly defining the points at which pstate ownership is taken and released, and to handle all transitions under the correct lock. Instead of at context init time, the previous state is saved only at the point the context overrides the current state, and is restored on context exit only if the context is still the owner of the current override state. Signed-off-by: Tvrtko Ursulin Fixes: 79610d304133 ("drm/amdgpu: fix pstate setting issue") Cc: Chengming Gui Cc: Alex Deucher Cc: "Christian König" Signed-off-by: Alex Deucher (cherry picked from commit 1b5e413713c0a93bc1818394d0ce49aaad21bd27) Cc: # v6.1+ --- diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 0d7f6cd74f79b..ce35b415093d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -326,7 +326,6 @@ static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority, struct drm_file *filp, struct amdgpu_ctx *ctx) { struct amdgpu_fpriv *fpriv = filp->driver_priv; - u32 current_stable_pstate; int r; r = amdgpu_ctx_priority_permit(filp, priority); @@ -344,36 +343,21 @@ static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority, ctx->generation = amdgpu_vm_generation(mgr->adev, &fpriv->vm); ctx->init_priority = priority; ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET; - - r = amdgpu_ctx_get_stable_pstate(ctx, ¤t_stable_pstate); - if (r) - return r; - - if (mgr->adev->pm.stable_pstate_ctx) - ctx->stable_pstate = mgr->adev->pm.stable_pstate_ctx->stable_pstate; - else - ctx->stable_pstate = current_stable_pstate; + ctx->stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE; return 0; } -static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx, - u32 stable_pstate) +static int __amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx, + u32 stable_pstate) { struct amdgpu_device *adev = ctx->mgr->adev; enum amd_dpm_forced_level level; + struct amdgpu_ctx *current_ctx; u32 current_stable_pstate; - int r; + int r = 0; - mutex_lock(&adev->pm.stable_pstate_ctx_lock); - if (adev->pm.stable_pstate_ctx && adev->pm.stable_pstate_ctx != ctx) { - r = -EBUSY; - goto done; - } - - r = amdgpu_ctx_get_stable_pstate(ctx, ¤t_stable_pstate); - if (r || (stable_pstate == current_stable_pstate)) - goto done; + lockdep_assert_held(&adev->pm.stable_pstate_ctx_lock); switch (stable_pstate) { case AMDGPU_CTX_STABLE_PSTATE_NONE: @@ -392,17 +376,41 @@ static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx, level = AMD_DPM_FORCED_LEVEL_PROFILE_PEAK; break; default: - r = -EINVAL; - goto done; + return -EINVAL; } + current_ctx = adev->pm.stable_pstate_ctx; + if (current_ctx && current_ctx != ctx) + return -EBUSY; + + r = amdgpu_ctx_get_stable_pstate(ctx, ¤t_stable_pstate); + if (r || current_stable_pstate == stable_pstate) + return r; + r = amdgpu_dpm_force_performance_level(adev, level); + if (r) + return r; - if (level == AMD_DPM_FORCED_LEVEL_AUTO) - adev->pm.stable_pstate_ctx = NULL; - else + if (!current_ctx) { adev->pm.stable_pstate_ctx = ctx; -done: + /* + * Serialized by context taking ownership for the first time + * while holding adev->pm.stable_pstate_ctx_lock). + */ + WRITE_ONCE(ctx->stable_pstate, current_stable_pstate); + } + + return 0; +} + +static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx, + u32 stable_pstate) +{ + struct amdgpu_device *adev = ctx->mgr->adev; + int r; + + mutex_lock(&adev->pm.stable_pstate_ctx_lock); + r = __amdgpu_ctx_set_stable_pstate(ctx, stable_pstate); mutex_unlock(&adev->pm.stable_pstate_ctx_lock); return r; @@ -428,7 +436,12 @@ static void amdgpu_ctx_fini(struct kref *ref) } if (drm_dev_enter(adev_to_drm(adev), &idx)) { - amdgpu_ctx_set_stable_pstate(ctx, ctx->stable_pstate); + mutex_lock(&adev->pm.stable_pstate_ctx_lock); + if (adev->pm.stable_pstate_ctx == ctx) { + __amdgpu_ctx_set_stable_pstate(ctx, ctx->stable_pstate); + adev->pm.stable_pstate_ctx = NULL; + } + mutex_unlock(&adev->pm.stable_pstate_ctx_lock); drm_dev_exit(idx); }