]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
drm/amdkfd: Add PTL control IOCTL Option and unify refcount logic
authorPerry Yuan <perry.yuan@amd.com>
Sun, 8 Feb 2026 16:42:07 +0000 (00:42 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 11 May 2026 19:55:56 +0000 (15:55 -0400)
Introduce a new IOCTL option to allow userspace explicit control over
the Peak Tops Limiter (PTL) state for profiling

Link: https://github.com/ROCm/rocm-systems/tree/develop/projects/rocprofiler-sdk
Signed-off-by: Perry Yuan <perry.yuan@amd.com>
Reviewed-by: Yifan Zhang <yifan1.zhang@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
drivers/gpu/drm/amd/amdkfd/kfd_process.c
drivers/gpu/drm/amd/include/amdgpu_ptl.h
include/uapi/linux/kfd_ioctl.h

index 467a3dbe1bfad702cab3244764686b3cbcb7e348..aab6a4de54fa0c39ac418abd6d3a5839de9d576b 100644 (file)
@@ -2400,6 +2400,8 @@ static int gfx_v9_4_3_perf_monitor_ptl_init(struct amdgpu_device *adev, bool ena
 
        ptl->hw_supported = true;
 
+       atomic_set(&ptl->disable_ref, 0);
+
        return 0;
 }
 
index fc00d0418684c868fd9812c0ef8dee7c44161ab4..883de31df04df2f1920cfe84385b8b5aa49e1571 100644 (file)
@@ -1774,6 +1774,104 @@ static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
 }
 #endif
 
+static int kfd_ptl_control(struct kfd_process_device *pdd, bool enable)
+{
+       struct amdgpu_device *adev = pdd->dev->adev;
+       struct amdgpu_ptl *ptl = &adev->psp.ptl;
+       enum amdgpu_ptl_fmt pref_format1 = ptl->fmt1;
+       enum amdgpu_ptl_fmt pref_format2 = ptl->fmt2;
+       uint32_t ptl_state = enable ? 1 : 0;
+       int ret;
+
+       if (!ptl->hw_supported)
+               return -EOPNOTSUPP;
+
+       if (!pdd->dev->kfd2kgd || !pdd->dev->kfd2kgd->ptl_ctrl)
+               return -EOPNOTSUPP;
+
+       ret = pdd->dev->kfd2kgd->ptl_ctrl(adev, PSP_PTL_PERF_MON_SET,
+                                         &ptl_state,
+                                         &pref_format1,
+                                         &pref_format2);
+       return ret;
+}
+
+int kfd_ptl_disable_request(struct kfd_process_device *pdd,
+               struct kfd_process *p)
+{
+       struct amdgpu_device *adev = pdd->dev->adev;
+       struct amdgpu_ptl *ptl = &adev->psp.ptl;
+       int ret = 0;
+
+       mutex_lock(&ptl->mutex);
+
+       if (pdd->ptl_disable_req)
+               goto out;
+
+       if (atomic_inc_return(&ptl->disable_ref) == 1) {
+               ret = kfd_ptl_control(pdd, false);
+               if (ret) {
+                       atomic_dec(&ptl->disable_ref);
+                       dev_warn(pdd->dev->adev->dev,
+                                       "failed to disable PTL\n");
+                       goto out;
+               }
+       }
+       pdd->ptl_disable_req = true;
+
+out:
+       mutex_unlock(&ptl->mutex);
+       return ret;
+}
+
+int kfd_ptl_disable_release(struct kfd_process_device *pdd,
+               struct kfd_process *p)
+{
+       struct amdgpu_device *adev = pdd->dev->adev;
+       struct amdgpu_ptl *ptl = &adev->psp.ptl;
+       int ret = 0;
+
+       mutex_lock(&ptl->mutex);
+
+       if (!pdd->ptl_disable_req)
+               goto out;
+
+       if (atomic_dec_return(&ptl->disable_ref) == 0) {
+               ret = kfd_ptl_control(pdd, true);
+               if (ret) {
+                       atomic_inc(&ptl->disable_ref);
+                       dev_warn(adev->dev, "Failed to enable PTL on release: %d\n", ret);
+                       goto out;
+               }
+       }
+       pdd->ptl_disable_req = false;
+
+out:
+       mutex_unlock(&ptl->mutex);
+       return ret;
+}
+
+static int kfd_profiler_ptl_control(struct kfd_process *p,
+               struct kfd_ioctl_ptl_control *args)
+{
+       struct kfd_process_device *pdd;
+       int ret;
+
+       mutex_lock(&p->mutex);
+       pdd = kfd_process_device_data_by_id(p, args->gpu_id);
+       mutex_unlock(&p->mutex);
+
+       if (!pdd || !pdd->dev || !pdd->dev->kfd)
+               return -EINVAL;
+
+       if (args->enable == 0)
+               ret = kfd_ptl_disable_request(pdd, p);
+       else
+               ret = kfd_ptl_disable_release(pdd, p);
+
+       return ret;
+}
+
 static int criu_checkpoint_process(struct kfd_process *p,
                             uint8_t __user *user_priv_data,
                             uint64_t *priv_offset)
@@ -3242,6 +3340,7 @@ static inline uint32_t profile_lock_device(struct kfd_process *p,
                if (!kfd->profiler_process) {
                        kfd->profiler_process = p;
                        status = 0;
+                       kfd_ptl_disable_request(pdd, p);
                } else if (kfd->profiler_process == p) {
                        status = -EALREADY;
                } else {
@@ -3250,6 +3349,7 @@ static inline uint32_t profile_lock_device(struct kfd_process *p,
        } else if (op == 0 && kfd->profiler_process == p) {
                kfd->profiler_process = NULL;
                status = 0;
+               kfd_ptl_disable_release(pdd, p);
        }
        mutex_unlock(&kfd->profiler_lock);
 
@@ -3292,6 +3392,8 @@ static int kfd_ioctl_profiler(struct file *filep, struct kfd_process *p, void *d
                return 0;
        case KFD_IOC_PROFILER_PMC:
                return kfd_profiler_pmc(p, &args->pmc);
+       case KFD_IOC_PROFILER_PTL_CONTROL:
+               return kfd_profiler_ptl_control(p, &args->ptl);
        }
        return -EINVAL;
 }
index 903386e0740b9b105f6cb97fe4a83fd335a6c41e..482bcfa10f82f76489c61e931bb6a04af7c07b1f 100644 (file)
@@ -872,6 +872,8 @@ struct kfd_process_device {
        bool has_reset_queue;
 
        u32 pasid;
+       /* Indicates this process has requested PTL stay disabled */
+       bool ptl_disable_req;
 };
 
 #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
@@ -1603,6 +1605,12 @@ static inline bool kfd_is_first_node(struct kfd_node *node)
        return (node == node->kfd->nodes[0]);
 }
 
+/* PTL support */
+int kfd_ptl_disable_request(struct kfd_process_device *pdd,
+               struct kfd_process *p);
+int kfd_ptl_disable_release(struct kfd_process_device *pdd,
+               struct kfd_process *p);
+
 /* Debugfs */
 #if defined(CONFIG_DEBUG_FS)
 
index 1a8cb512dfe3d61a24427e49d88738676de618c1..368283d53077ecfb087c0621380b91c2caf10b2f 100644 (file)
@@ -1128,6 +1128,10 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
                pr_debug("Releasing pdd (topology id %d, for pid %d)\n",
                        pdd->dev->id, p->lead_thread->pid);
                kfd_process_profiler_release(p, pdd);
+
+               if (pdd->ptl_disable_req)
+                       kfd_ptl_disable_release(pdd, p);
+
                kfd_process_device_destroy_cwsr_dgpu(pdd);
                kfd_process_device_destroy_ib_mem(pdd);
 
index ffed443a14ae6b3f107fd1c81c2ae01c1431cdd1..9e63a9a9680afb05c8d5c2613b39c2c458b9d2e9 100644 (file)
@@ -39,6 +39,8 @@ struct amdgpu_ptl {
        enum amdgpu_ptl_fmt             fmt2;
        bool                            enabled;
        bool                            hw_supported;
+       /* PTL disable reference counting */
+       atomic_t                        disable_ref;
        struct mutex                    mutex;
 };
 
index cc3ed0765c83e6e72d27615fc27cc0d52d45d42d..1a94d512df3584a44baef89a559e96867a92450d 100644 (file)
@@ -1562,6 +1562,7 @@ struct kfd_ioctl_dbg_trap_args {
 enum kfd_profiler_ops {
        KFD_IOC_PROFILER_PMC = 0,
        KFD_IOC_PROFILER_VERSION = 2,
+       KFD_IOC_PROFILER_PTL_CONTROL = 3,
 };
 
 /**
@@ -1573,10 +1574,16 @@ struct kfd_ioctl_pmc_settings {
        __u32 perfcount_enable;   /* Force Perfcount Enable for queues on GPU */
 };
 
+struct kfd_ioctl_ptl_control {
+       __u32 gpu_id; /* user_gpu_id */
+       __u32 enable; /* set 1 to enable PTL, set 0 to disable PTL */
+};
+
 struct kfd_ioctl_profiler_args {
        __u32 op;                                               /* kfd_profiler_op */
        union {
                struct kfd_ioctl_pmc_settings  pmc;
+               struct kfd_ioctl_ptl_control   ptl;
                __u32 version;                          /* KFD_IOC_PROFILER_VERSION_NUM */
        };
 };