]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
amd/amdkfd: Add kfd_ioctl_profiler to contain profiler kernel driver changes
authorBenjamin Welton <bewelton@amd.com>
Sun, 8 Feb 2026 16:42:00 +0000 (00:42 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 11 May 2026 19:55:55 +0000 (15:55 -0400)
kfd_ioctl_profiler takes a similar approach to that of
kfd_ioctl_dbg_trap (which contains debugger related IOCTL
services) where kfd_ioctl_profiler will contain all profiler
related IOCTL services. The IOCTL is designed to be expanded
as needed to support additional profiler functionality.

The current functionality of the IOCTL is to allow for profilers
which need PMC counters from GPU devices to both signal to other
profilers that may be on the system that the device has active PMC
profiling taking place on it (multiple PMC profilers on the same
device can result in corrupted counter data) and to setup the device
to allow for the collection of SQ PMC data on all queues on the device.

For PMC data for the SQ block (such as SQ_WAVES) to be available
to a profiler, mmPERFCOUNT_ENABLE must be set on the queues. When
profiling a single process, the profiler can inject PM4 packets into
each queue to turn on PERFCOUNT_ENABLE. When profiling system wide,
the profiler does not have this option and must have a way to turn
on profiling for queues in which it cannot inject packets into directly.

Accomplishing this requires a few steps:

1. Checking if the user has the necessary permissions to profile system
   wide on the device. This check uses the same check that linux perf
   uses to determine if a user has the necessary permissions to profile
   at this scope (primarily if the process has CAP_SYS_PERFMON or is root).

2. Locking the device for profiling. This is done by setting a lock bit
   on the device struct and storing the process that locked the device.

3. Iterating all queues on the device and issuing an MQD Update to enable
   perfcounting on the queues.

4. Actions to cleanup if the process exits or releases the lock.

The IOCTL also contains a link to the existing PC Sampling IOCTL as well.
This is per a suggestion that we should potentially remove the PC Sampling
IOCTL to have it be a part of the profiler IOCTL. This is a future change.
In addition, we do expect to expand the profiler IOCTL to include
additional profiler functionality in the future (which necessitates the
use of a version number).

v2: sqaush in proper IOCTL number

Proposed userpace support:
https://github.com/ROCm/rocm-systems/commit/40abc95a6463a61bb318a67efd6d9cc3e5ee8839

Signed-off-by: Benjamin Welton <benjamin.welton@amd.com>
Signed-off-by: Perry Yuan <perry.yuan@amd.com>
Acked-by: Kent Russell <kent.russell@amd.com>
Reviewed-by: Yifan Zhang <yifan1.zhang@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
12 files changed:
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
drivers/gpu/drm/amd/amdkfd/kfd_device.c
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
drivers/gpu/drm/amd/amdkfd/kfd_process.c
include/uapi/linux/kfd_ioctl.h

index dd27d7ba2ee2d1102d55d2b18dfa8550f2addece..d18ec3671fda858b5f61584fcc0c46736107e94d 100644 (file)
@@ -21,6 +21,7 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
+#include <linux/capability.h>
 #include <linux/device.h>
 #include <linux/err.h>
 #include <linux/fs.h>
@@ -3216,6 +3217,84 @@ static int kfd_ioctl_create_process(struct file *filep, struct kfd_process *p, v
        return 0;
 }
 
+static inline uint32_t profile_lock_device(struct kfd_process *p,
+                                          uint32_t gpu_id, uint32_t op)
+{
+       struct kfd_process_device *pdd;
+       struct kfd_dev *kfd;
+       int status = -EINVAL;
+
+       if (!p)
+               return -EINVAL;
+
+       mutex_lock(&p->mutex);
+       pdd = kfd_process_device_data_by_id(p, gpu_id);
+       mutex_unlock(&p->mutex);
+
+       if (!pdd || !pdd->dev || !pdd->dev->kfd)
+               return -EINVAL;
+
+       kfd = pdd->dev->kfd;
+
+       mutex_lock(&kfd->profiler_lock);
+       if (op == 1) {
+               if (!kfd->profiler_process) {
+                       kfd->profiler_process = p;
+                       status = 0;
+               } else if (kfd->profiler_process == p) {
+                       status = -EALREADY;
+               } else {
+                       status = -EBUSY;
+               }
+       } else if (op == 0 && kfd->profiler_process == p) {
+               kfd->profiler_process = NULL;
+               status = 0;
+       }
+       mutex_unlock(&kfd->profiler_lock);
+
+       return status;
+}
+
+static inline int kfd_profiler_pmc(struct kfd_process *p,
+                                  struct kfd_ioctl_pmc_settings *args)
+{
+       struct kfd_process_device *pdd;
+       struct device_queue_manager *dqm;
+       int status;
+
+       /* Check if we have the correct permissions. */
+       if (!perfmon_capable())
+               return -EPERM;
+
+       /* Lock/Unlock the device based on the parameter given in OP */
+       status = profile_lock_device(p, args->gpu_id, args->lock);
+       if (status != 0)
+               return status;
+
+       /* Enable/disable perfcount if requested */
+       mutex_lock(&p->mutex);
+       pdd = kfd_process_device_data_by_id(p, args->gpu_id);
+       dqm = pdd->dev->dqm;
+       mutex_unlock(&p->mutex);
+
+       dqm->ops.set_perfcount(dqm, args->perfcount_enable);
+       return status;
+}
+
+static int kfd_ioctl_profiler(struct file *filep, struct kfd_process *p, void *data)
+{
+       struct kfd_ioctl_profiler_args *args = data;
+
+       switch (args->op) {
+       case KFD_IOC_PROFILER_VERSION:
+               args->version = KFD_IOC_PROFILER_VERSION_NUM;
+               return 0;
+       case KFD_IOC_PROFILER_PMC:
+               return kfd_profiler_pmc(p, &args->pmc);
+       }
+       return -EINVAL;
+}
+
 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
        [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
                            .validate = NULL, .cmd_drv = 0, .name = #ioctl}
@@ -3342,6 +3421,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
 
        AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_PROCESS,
                        kfd_ioctl_create_process, 0),
+
+       AMDKFD_IOCTL_DEF(AMDKFD_IOC_PROFILER,
+                       kfd_ioctl_profiler, 0),
 };
 
 #define AMDKFD_CORE_IOCTL_COUNT        ARRAY_SIZE(amdkfd_ioctls)
index b7f8f7ff819834d23989e1f993d0d9e5060f4f26..d649d8603e28e07a0bb64e807aedca4cc12c64d0 100644 (file)
@@ -936,6 +936,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 
        svm_range_set_max_pages(kfd->adev);
 
+       kfd->profiler_process = NULL;
+       mutex_init(&kfd->profiler_lock);
+
        kfd->init_complete = true;
        dev_info(kfd_device, "added device %x:%x\n", kfd->adev->pdev->vendor,
                 kfd->adev->pdev->device);
@@ -971,6 +974,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
                ida_destroy(&kfd->doorbell_ida);
                kfd_gtt_sa_fini(kfd);
                amdgpu_amdkfd_free_kernel_mem(kfd->adev, &kfd->gtt_mem);
+               mutex_destroy(&kfd->profiler_lock);
        }
 
        kfree(kfd);
index a9ac575537e59600eff9904f7d61ef9105e858a6..c64a1e19fa3f7fe241b00c944b352bce4fb89425 100644 (file)
@@ -324,6 +324,29 @@ static int remove_queue_mes_on_reset_option(struct device_queue_manager *dqm, st
        return r;
 }
 
+static void set_perfcount(struct device_queue_manager *dqm, int enable)
+{
+       struct device_process_node *cur;
+       struct qcm_process_device *qpd;
+       struct queue *q;
+       struct mqd_update_info minfo = { 0 };
+
+       if (!dqm)
+               return;
+
+       minfo.update_flag = (enable == 1 ? UPDATE_FLAG_PERFCOUNT_ENABLE :
+                                                UPDATE_FLAG_PERFCOUNT_DISABLE);
+       dqm_lock(dqm);
+       list_for_each_entry(cur, &dqm->queues, list) {
+               qpd = cur->qpd;
+               list_for_each_entry(q, &qpd->queues_list, list) {
+                       pqm_update_mqd(qpd->pqm, q->properties.queue_id,
+                                               &minfo);
+               }
+       }
+       dqm_unlock(dqm);
+}
+
 static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q,
                            struct qcm_process_device *qpd)
 {
@@ -3113,6 +3136,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev)
                dqm->ops.reset_queues = reset_queues_cpsch;
                dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info;
                dqm->ops.checkpoint_mqd = checkpoint_mqd;
+               dqm->ops.set_perfcount = set_perfcount;
                break;
        case KFD_SCHED_POLICY_NO_HWS:
                /* initialize dqm for no cp scheduling */
@@ -3133,6 +3157,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev)
                dqm->ops.get_wave_state = get_wave_state;
                dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info;
                dqm->ops.checkpoint_mqd = checkpoint_mqd;
+               dqm->ops.set_perfcount = set_perfcount;
                break;
        default:
                dev_err(dev->adev->dev, "Invalid scheduling policy %d\n", dqm->sched_policy);
index a0323501c6b9660c6be59578671916bb26b501c8..e0b6a47e7722b9b66ea7e25ec3c9a7e6744cb5a9 100644 (file)
@@ -199,6 +199,8 @@ struct device_queue_manager_ops {
                                  const struct queue *q,
                                  void *mqd,
                                  void *ctl_stack);
+       void    (*set_perfcount)(struct device_queue_manager *dqm,
+                                 int enable);
 };
 
 struct device_queue_manager_asic_ops {
index 77fb41e2486a46e82d7ceb4c8635e6c624cab9dc..8e8ec266ca46d670cd1c77cce67adc15f3e9a707 100644 (file)
@@ -123,10 +123,9 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
         */
        m->cp_hqd_hq_scheduler0 = 1 << 14;
 
-       if (q->format == KFD_QUEUE_FORMAT_AQL) {
+       if (q->format == KFD_QUEUE_FORMAT_AQL)
                m->cp_hqd_aql_control =
                        1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT;
-       }
 
        if (mm->dev->kfd->cwsr_enabled) {
                m->cp_hqd_persistent_state |=
@@ -141,6 +140,12 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
                m->cp_hqd_wg_state_offset = q->ctl_stack_size;
        }
 
+       mutex_lock(&mm->dev->kfd->profiler_lock);
+       if (mm->dev->kfd->profiler_process != NULL)
+               m->compute_perfcount_enable = 1;
+
+       mutex_unlock(&mm->dev->kfd->profiler_lock);
+
        *mqd = m;
        if (gart_addr)
                *gart_addr = addr;
@@ -220,6 +225,13 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
        if (mm->dev->kfd->cwsr_enabled)
                m->cp_hqd_ctx_save_control = 0;
 
+       if (minfo) {
+               if (minfo->update_flag == UPDATE_FLAG_PERFCOUNT_ENABLE)
+                       m->compute_perfcount_enable = 1;
+               else if (minfo->update_flag == UPDATE_FLAG_PERFCOUNT_DISABLE)
+                       m->compute_perfcount_enable = 0;
+       }
+
        update_cu_mask(mm, mqd, minfo);
        set_priority(m, q);
 
index a1e3cf2384dd3b6574029e01fbf882f9acdb615f..7568e7ed5244dd4727920e4e3512641d5b897c9f 100644 (file)
@@ -163,10 +163,9 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
        if (amdgpu_amdkfd_have_atomics_support(mm->dev->adev))
                m->cp_hqd_hq_status0 |= 1 << 29;
 
-       if (q->format == KFD_QUEUE_FORMAT_AQL) {
+       if (q->format == KFD_QUEUE_FORMAT_AQL)
                m->cp_hqd_aql_control =
                        1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT;
-       }
 
        if (mm->dev->kfd->cwsr_enabled) {
                m->cp_hqd_persistent_state |=
@@ -181,6 +180,11 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
                m->cp_hqd_wg_state_offset = q->ctl_stack_size;
        }
 
+       mutex_lock(&mm->dev->kfd->profiler_lock);
+       if (mm->dev->kfd->profiler_process != NULL)
+               m->compute_perfcount_enable = 1;
+       mutex_unlock(&mm->dev->kfd->profiler_lock);
+
        *mqd = m;
        if (gart_addr)
                *gart_addr = addr;
@@ -258,6 +262,12 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
        }
        if (mm->dev->kfd->cwsr_enabled)
                m->cp_hqd_ctx_save_control = 0;
+       if (minfo) {
+               if (minfo->update_flag == UPDATE_FLAG_PERFCOUNT_ENABLE)
+                       m->compute_perfcount_enable = 1;
+               else if (minfo->update_flag == UPDATE_FLAG_PERFCOUNT_DISABLE)
+                       m->compute_perfcount_enable = 0;
+       }
 
        update_cu_mask(mm, mqd, minfo);
        set_priority(m, q);
index b3e122d7876e08ce8fdc10c08b6132e8030dec24..8c815f129614c7ef13e528492f36c090623ab162 100644 (file)
@@ -138,10 +138,9 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
        if (amdgpu_amdkfd_have_atomics_support(mm->dev->adev))
                m->cp_hqd_hq_status0 |= 1 << 29;
 
-       if (q->format == KFD_QUEUE_FORMAT_AQL) {
+       if (q->format == KFD_QUEUE_FORMAT_AQL)
                m->cp_hqd_aql_control =
                        1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT;
-       }
 
        if (mm->dev->kfd->cwsr_enabled) {
                m->cp_hqd_persistent_state |=
@@ -156,6 +155,11 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
                m->cp_hqd_wg_state_offset = q->ctl_stack_size;
        }
 
+       mutex_lock(&mm->dev->kfd->profiler_lock);
+       if (mm->dev->kfd->profiler_process != NULL)
+               m->compute_perfcount_enable = 1;
+       mutex_unlock(&mm->dev->kfd->profiler_lock);
+
        *mqd = m;
        if (gart_addr)
                *gart_addr = addr;
index e8f97de9d6e476da315b6f14ed4aa7cc2916e78d..56a7679ca98d57b7642a03197fa9596e53af84b2 100644 (file)
@@ -227,10 +227,9 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
                m->cp_hqd_aql_control =
                        1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT;
 
-       if (q->tba_addr) {
+       if (q->tba_addr)
                m->compute_pgm_rsrc2 |=
                        (1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT);
-       }
 
        if (mm->dev->kfd->cwsr_enabled && q->ctx_save_restore_area_address) {
                m->cp_hqd_persistent_state |=
@@ -245,6 +244,11 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
                m->cp_hqd_wg_state_offset = q->ctl_stack_size;
        }
 
+       mutex_lock(&mm->dev->kfd->profiler_lock);
+       if (mm->dev->kfd->profiler_process != NULL)
+               m->compute_perfcount_enable = 1;
+       mutex_unlock(&mm->dev->kfd->profiler_lock);
+
        *mqd = m;
        if (gart_addr)
                *gart_addr = addr;
@@ -327,6 +331,13 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
        if (mm->dev->kfd->cwsr_enabled && q->ctx_save_restore_area_address)
                m->cp_hqd_ctx_save_control = 0;
 
+       if (minfo) {
+               if (minfo->update_flag == UPDATE_FLAG_PERFCOUNT_ENABLE)
+                       m->compute_perfcount_enable = 1;
+               else if (minfo->update_flag == UPDATE_FLAG_PERFCOUNT_DISABLE)
+                       m->compute_perfcount_enable = 0;
+       }
+
        if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3) &&
            KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4) &&
            KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 5, 0))
index 431a20323146bf9cd48094a862df4f8a4593fa49..c86779af323bb5e7895feb4128911538ca0eb3f9 100644 (file)
@@ -148,6 +148,11 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
                m->cp_hqd_wg_state_offset = q->ctl_stack_size;
        }
 
+       mutex_lock(&mm->dev->kfd->profiler_lock);
+       if (mm->dev->kfd->profiler_process != NULL)
+               m->compute_perfcount_enable = 1;
+       mutex_unlock(&mm->dev->kfd->profiler_lock);
+
        *mqd = m;
        if (gart_addr)
                *gart_addr = addr;
@@ -230,6 +235,12 @@ static void __update_mqd(struct mqd_manager *mm, void *mqd,
                m->cp_hqd_ctx_save_control =
                        atc_bit << CP_HQD_CTX_SAVE_CONTROL__ATC__SHIFT |
                        mtype << CP_HQD_CTX_SAVE_CONTROL__MTYPE__SHIFT;
+       if (minfo) {
+               if (minfo->update_flag == UPDATE_FLAG_PERFCOUNT_ENABLE)
+                       m->compute_perfcount_enable = 1;
+               else if (minfo->update_flag == UPDATE_FLAG_PERFCOUNT_DISABLE)
+                       m->compute_perfcount_enable = 0;
+       }
 
        update_cu_mask(mm, mqd, minfo);
        set_priority(m, q);
index 9fe5c66d8013ad91495caf832a3ab7e0ae7c5bf9..903386e0740b9b105f6cb97fe4a83fd335a6c41e 100644 (file)
@@ -383,6 +383,11 @@ struct kfd_dev {
        int kfd_dev_lock;
 
        atomic_t kfd_processes_count;
+
+       /* Lock for profiler process */
+       struct mutex profiler_lock;
+       /* Process currently holding the lock */
+       struct kfd_process *profiler_process;
 };
 
 enum kfd_mempool {
@@ -556,6 +561,8 @@ enum mqd_update_flag {
        UPDATE_FLAG_DBG_WA_ENABLE = 1,
        UPDATE_FLAG_DBG_WA_DISABLE = 2,
        UPDATE_FLAG_IS_GWS = 4, /* quirk for gfx9 IP */
+       UPDATE_FLAG_PERFCOUNT_ENABLE = 5,
+       UPDATE_FLAG_PERFCOUNT_DISABLE = 6,
 };
 
 struct mqd_update_info {
index 9228e4a949ed1cf9d366ecd72b568ed069c3eef0..1a8cb512dfe3d61a24427e49d88738676de618c1 100644 (file)
@@ -1106,6 +1106,16 @@ static void kfd_process_free_outstanding_kfd_bos(struct kfd_process *p)
                kfd_process_device_free_bos(p->pdds[i]);
 }
 
+static void kfd_process_profiler_release(struct kfd_process *p, struct kfd_process_device *pdd)
+{
+       mutex_lock(&pdd->dev->kfd->profiler_lock);
+       if (pdd->dev->kfd->profiler_process == p) {
+               pdd->qpd.dqm->ops.set_perfcount(pdd->qpd.dqm, 0);
+               pdd->dev->kfd->profiler_process = NULL;
+       }
+       mutex_unlock(&pdd->dev->kfd->profiler_lock);
+}
+
 static void kfd_process_destroy_pdds(struct kfd_process *p)
 {
        int i;
@@ -1117,6 +1127,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
 
                pr_debug("Releasing pdd (topology id %d, for pid %d)\n",
                        pdd->dev->id, p->lead_thread->pid);
+               kfd_process_profiler_release(p, pdd);
                kfd_process_device_destroy_cwsr_dgpu(pdd);
                kfd_process_device_destroy_ib_mem(pdd);
 
index e72359370857c2408ee5885706c2e206d3ae6faf..cc3ed0765c83e6e72d27615fc27cc0d52d45d42d 100644 (file)
@@ -1558,6 +1558,29 @@ struct kfd_ioctl_dbg_trap_args {
        };
 };
 
+#define KFD_IOC_PROFILER_VERSION_NUM 1
+enum kfd_profiler_ops {
+       KFD_IOC_PROFILER_PMC = 0,
+       KFD_IOC_PROFILER_VERSION = 2,
+};
+
+/**
+ * Enables/Disables GPU Specific profiler settings
+ */
+struct kfd_ioctl_pmc_settings {
+       __u32 gpu_id;             /* This is the user_gpu_id */
+       __u32 lock;               /* Lock GPU for Profiling */
+       __u32 perfcount_enable;   /* Force Perfcount Enable for queues on GPU */
+};
+
+struct kfd_ioctl_profiler_args {
+       __u32 op;                                               /* kfd_profiler_op */
+       union {
+               struct kfd_ioctl_pmc_settings  pmc;
+               __u32 version;                          /* KFD_IOC_PROFILER_VERSION_NUM */
+       };
+};
+
 #define AMDKFD_IOCTL_BASE 'K'
 #define AMDKFD_IO(nr)                  _IO(AMDKFD_IOCTL_BASE, nr)
 #define AMDKFD_IOR(nr, type)           _IOR(AMDKFD_IOCTL_BASE, nr, type)
@@ -1681,7 +1704,10 @@ struct kfd_ioctl_dbg_trap_args {
 #define AMDKFD_IOC_CREATE_PROCESS              \
                AMDKFD_IO(0x27)
 
+#define AMDKFD_IOC_PROFILER                    \
+               AMDKFD_IOWR(0x28, struct kfd_ioctl_profiler_args)
+
 #define AMDKFD_COMMAND_START           0x01
-#define AMDKFD_COMMAND_END             0x28
+#define AMDKFD_COMMAND_END             0x29
 
 #endif