From: Perry Yuan Date: Sun, 8 Feb 2026 16:42:07 +0000 (+0800) Subject: drm/amdkfd: Add PTL control IOCTL Option and unify refcount logic X-Git-Url: http://git.ipfire.org/gitweb/index.cgi?a=commitdiff_plain;h=dd61e27535a6f5cfb32a847b282d2e3d5aebf46f;p=thirdparty%2Fkernel%2Flinux.git drm/amdkfd: Add PTL control IOCTL Option and unify refcount logic Introduce a new IOCTL option to allow userspace explicit control over the Peak Tops Limiter (PTL) state for profiling Link: https://github.com/ROCm/rocm-systems/tree/develop/projects/rocprofiler-sdk Signed-off-by: Perry Yuan Reviewed-by: Yifan Zhang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 467a3dbe1bfa..aab6a4de54fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -2400,6 +2400,8 @@ static int gfx_v9_4_3_perf_monitor_ptl_init(struct amdgpu_device *adev, bool ena ptl->hw_supported = true; + atomic_set(&ptl->disable_ref, 0); + return 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index fc00d0418684..883de31df04d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1774,6 +1774,104 @@ static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data) } #endif +static int kfd_ptl_control(struct kfd_process_device *pdd, bool enable) +{ + struct amdgpu_device *adev = pdd->dev->adev; + struct amdgpu_ptl *ptl = &adev->psp.ptl; + enum amdgpu_ptl_fmt pref_format1 = ptl->fmt1; + enum amdgpu_ptl_fmt pref_format2 = ptl->fmt2; + uint32_t ptl_state = enable ? 1 : 0; + int ret; + + if (!ptl->hw_supported) + return -EOPNOTSUPP; + + if (!pdd->dev->kfd2kgd || !pdd->dev->kfd2kgd->ptl_ctrl) + return -EOPNOTSUPP; + + ret = pdd->dev->kfd2kgd->ptl_ctrl(adev, PSP_PTL_PERF_MON_SET, + &ptl_state, + &pref_format1, + &pref_format2); + return ret; +} + +int kfd_ptl_disable_request(struct kfd_process_device *pdd, + struct kfd_process *p) +{ + struct amdgpu_device *adev = pdd->dev->adev; + struct amdgpu_ptl *ptl = &adev->psp.ptl; + int ret = 0; + + mutex_lock(&ptl->mutex); + + if (pdd->ptl_disable_req) + goto out; + + if (atomic_inc_return(&ptl->disable_ref) == 1) { + ret = kfd_ptl_control(pdd, false); + if (ret) { + atomic_dec(&ptl->disable_ref); + dev_warn(pdd->dev->adev->dev, + "failed to disable PTL\n"); + goto out; + } + } + pdd->ptl_disable_req = true; + +out: + mutex_unlock(&ptl->mutex); + return ret; +} + +int kfd_ptl_disable_release(struct kfd_process_device *pdd, + struct kfd_process *p) +{ + struct amdgpu_device *adev = pdd->dev->adev; + struct amdgpu_ptl *ptl = &adev->psp.ptl; + int ret = 0; + + mutex_lock(&ptl->mutex); + + if (!pdd->ptl_disable_req) + goto out; + + if (atomic_dec_return(&ptl->disable_ref) == 0) { + ret = kfd_ptl_control(pdd, true); + if (ret) { + atomic_inc(&ptl->disable_ref); + dev_warn(adev->dev, "Failed to enable PTL on release: %d\n", ret); + goto out; + } + } + pdd->ptl_disable_req = false; + +out: + mutex_unlock(&ptl->mutex); + return ret; +} + +static int kfd_profiler_ptl_control(struct kfd_process *p, + struct kfd_ioctl_ptl_control *args) +{ + struct kfd_process_device *pdd; + int ret; + + mutex_lock(&p->mutex); + pdd = kfd_process_device_data_by_id(p, args->gpu_id); + mutex_unlock(&p->mutex); + + if (!pdd || !pdd->dev || !pdd->dev->kfd) + return -EINVAL; + + if (args->enable == 0) + ret = kfd_ptl_disable_request(pdd, p); + else + ret = kfd_ptl_disable_release(pdd, p); + + return ret; +} + static int criu_checkpoint_process(struct kfd_process *p, uint8_t __user *user_priv_data, uint64_t *priv_offset) @@ -3242,6 +3340,7 @@ static inline uint32_t profile_lock_device(struct kfd_process *p, if (!kfd->profiler_process) { kfd->profiler_process = p; status = 0; + kfd_ptl_disable_request(pdd, p); } else if (kfd->profiler_process == p) { status = -EALREADY; } else { @@ -3250,6 +3349,7 @@ static inline uint32_t profile_lock_device(struct kfd_process *p, } else if (op == 0 && kfd->profiler_process == p) { kfd->profiler_process = NULL; status = 0; + kfd_ptl_disable_release(pdd, p); } mutex_unlock(&kfd->profiler_lock); @@ -3292,6 +3392,8 @@ static int kfd_ioctl_profiler(struct file *filep, struct kfd_process *p, void *d return 0; case KFD_IOC_PROFILER_PMC: return kfd_profiler_pmc(p, &args->pmc); + case KFD_IOC_PROFILER_PTL_CONTROL: + return kfd_profiler_ptl_control(p, &args->ptl); } return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 903386e0740b..482bcfa10f82 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -872,6 +872,8 @@ struct kfd_process_device { bool has_reset_queue; u32 pasid; + /* Indicates this process has requested PTL stay disabled */ + bool ptl_disable_req; }; #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd) @@ -1603,6 +1605,12 @@ static inline bool kfd_is_first_node(struct kfd_node *node) return (node == node->kfd->nodes[0]); } +/* PTL support */ +int kfd_ptl_disable_request(struct kfd_process_device *pdd, + struct kfd_process *p); +int kfd_ptl_disable_release(struct kfd_process_device *pdd, + struct kfd_process *p); + /* Debugfs */ #if defined(CONFIG_DEBUG_FS) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 1a8cb512dfe3..368283d53077 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1128,6 +1128,10 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) pr_debug("Releasing pdd (topology id %d, for pid %d)\n", pdd->dev->id, p->lead_thread->pid); kfd_process_profiler_release(p, pdd); + + if (pdd->ptl_disable_req) + kfd_ptl_disable_release(pdd, p); + kfd_process_device_destroy_cwsr_dgpu(pdd); kfd_process_device_destroy_ib_mem(pdd); diff --git a/drivers/gpu/drm/amd/include/amdgpu_ptl.h b/drivers/gpu/drm/amd/include/amdgpu_ptl.h index ffed443a14ae..9e63a9a9680a 100644 --- a/drivers/gpu/drm/amd/include/amdgpu_ptl.h +++ b/drivers/gpu/drm/amd/include/amdgpu_ptl.h @@ -39,6 +39,8 @@ struct amdgpu_ptl { enum amdgpu_ptl_fmt fmt2; bool enabled; bool hw_supported; + /* PTL disable reference counting */ + atomic_t disable_ref; struct mutex mutex; }; diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index cc3ed0765c83..1a94d512df35 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -1562,6 +1562,7 @@ struct kfd_ioctl_dbg_trap_args { enum kfd_profiler_ops { KFD_IOC_PROFILER_PMC = 0, KFD_IOC_PROFILER_VERSION = 2, + KFD_IOC_PROFILER_PTL_CONTROL = 3, }; /** @@ -1573,10 +1574,16 @@ struct kfd_ioctl_pmc_settings { __u32 perfcount_enable; /* Force Perfcount Enable for queues on GPU */ }; +struct kfd_ioctl_ptl_control { + __u32 gpu_id; /* user_gpu_id */ + __u32 enable; /* set 1 to enable PTL, set 0 to disable PTL */ +}; + struct kfd_ioctl_profiler_args { __u32 op; /* kfd_profiler_op */ union { struct kfd_ioctl_pmc_settings pmc; + struct kfd_ioctl_ptl_control ptl; __u32 version; /* KFD_IOC_PROFILER_VERSION_NUM */ }; };