]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
drm/amdkfd: Fix SMI event PID reporting for containers
authorAndrew Martin <andrew.martin@amd.com>
Thu, 28 May 2026 14:32:52 +0000 (10:32 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 17 Jun 2026 22:12:00 +0000 (18:12 -0400)
SMI events were reporting incorrect PIDs in containerized environments,
causing test failures where container processes expected to see their
namespace-local PIDs but instead received global host PIDs.

The issue had two root causes:

1. Event functions were called from kernel context (page fault handlers,
   migration workers) where 'current' refers to the kernel worker thread,
   not the userspace GPU process that triggered the event.

2. PID conversion used task_tgid_vnr() which returns the PID in the
   caller's namespace (init namespace for kernel threads), not the task's
   own namespace.

This patch updates the SMI event interface:

- Change 8 event function signatures to accept task_struct pointer
  instead of pid_t, allowing proper namespace-aware PID conversion

- Convert PIDs using task_tgid_nr_ns(task, task_active_pid_ns(task))
  which returns the PID as the process sees it via getpid()

- Update 10 call sites to pass p->lead_thread (the GPU process)
  instead of p->lead_thread->pid or current (kernel worker)

This ensures SMI events report container-local PIDs, which is critical
for containerized GPU workloads to correctly correlate events with their
processes.

Tested-by: Andrew Martin <andmarti@amd.com>
Assisted-by: Claude:Sonnet 4-5
Signed-off-by: Andrew Martin <andrew.martin@amd.com>
Reviewed-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 60271ec06e04ba5d69d68714f3abdf637d86c257)

drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
drivers/gpu/drm/amd/amdkfd/kfd_process.c
drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
drivers/gpu/drm/amd/amdkfd/kfd_svm.c

index 28dc6886c1ff101512473f13b29c9e834d1bbf6a..226e76ae0be773896cf6093ebd39a4eae1836b23 100644 (file)
@@ -424,7 +424,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
        migrate.dst = migrate.src + npages;
        scratch = (dma_addr_t *)(migrate.dst + npages);
 
-       kfd_smi_event_migration_start(node, p->lead_thread->pid,
+       kfd_smi_event_migration_start(node, p->lead_thread,
                                      start >> PAGE_SHIFT, end >> PAGE_SHIFT,
                                      0, node->id, prange->prefetch_loc,
                                      prange->preferred_loc, trigger);
@@ -462,7 +462,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
 
 out_free:
        kvfree(buf);
-       kfd_smi_event_migration_end(node, p->lead_thread->pid,
+       kfd_smi_event_migration_end(node, p->lead_thread,
                                    start >> PAGE_SHIFT, end >> PAGE_SHIFT,
                                    0, node->id, trigger, r);
 out:
@@ -727,7 +727,7 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,
        migrate.fault_page = fault_page;
        scratch = (dma_addr_t *)(migrate.dst + npages);
 
-       kfd_smi_event_migration_start(node, p->lead_thread->pid,
+       kfd_smi_event_migration_start(node, p->lead_thread,
                                      start >> PAGE_SHIFT, end >> PAGE_SHIFT,
                                      node->id, 0, prange->prefetch_loc,
                                      prange->preferred_loc, trigger);
@@ -766,7 +766,7 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,
 
 out_free:
        kvfree(buf);
-       kfd_smi_event_migration_end(node, p->lead_thread->pid,
+       kfd_smi_event_migration_end(node, p->lead_thread,
                                    start >> PAGE_SHIFT, end >> PAGE_SHIFT,
                                    node->id, 0, trigger, r);
 out:
index 368283d53077ecfb087c0621380b91c2caf10b2f..63815be995fc8e99f399fc5471641b0cfbc2ed90 100644 (file)
@@ -1969,7 +1969,7 @@ int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger)
                struct kfd_process_device *pdd = p->pdds[i];
                struct device *dev = pdd->dev->adev->dev;
 
-               kfd_smi_event_queue_eviction(pdd->dev, p->lead_thread->pid,
+               kfd_smi_event_queue_eviction(pdd->dev, p->lead_thread,
                                             trigger);
 
                r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm,
@@ -1999,7 +1999,7 @@ fail:
                if (n_evicted == 0)
                        break;
 
-               kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid);
+               kfd_smi_event_queue_restore(pdd->dev, p->lead_thread);
 
                if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
                                                              &pdd->qpd))
@@ -2022,7 +2022,7 @@ int kfd_process_restore_queues(struct kfd_process *p)
                struct kfd_process_device *pdd = p->pdds[i];
                struct device *dev = pdd->dev->adev->dev;
 
-               kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid);
+               kfd_smi_event_queue_restore(pdd->dev, p->lead_thread);
 
                r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
                                                              &pdd->qpd);
index dfbde5a571f6ae05c4c3c589a4e4646d9994d37b..e659cd50eb0bd628baecff7c066609aeb083cbd1 100644 (file)
@@ -195,17 +195,35 @@ static void add_event_to_kfifo(pid_t pid, struct kfd_node *dev,
        rcu_read_unlock();
 }
 
+/**
+ * kfd_smi_task_to_pid - Convert task to namespace-aware PID
+ * @task: task_struct pointer (typically p->lead_thread)
+ *
+ * Returns the PID as it appears in the task's own PID namespace.
+ * For containerized processes, this returns the container-local PID
+ * (what getpid() returns), not the global host PID.
+ *
+ * Returns 0 if task is NULL.
+ */
+static inline pid_t kfd_smi_task_to_pid(struct task_struct *task)
+{
+       return task ? task_tgid_nr_ns(task, task_active_pid_ns(task)) : 0;
+}
+
 __printf(4, 5)
-static void kfd_smi_event_add(pid_t pid, struct kfd_node *dev,
+static void kfd_smi_event_add(struct task_struct *task, struct kfd_node *dev,
                              unsigned int event, char *fmt, ...)
 {
        char fifo_in[KFD_SMI_EVENT_MSG_SIZE];
        int len;
        va_list args;
+       pid_t pid;
 
        if (list_empty(&dev->smi_clients))
                return;
 
+       pid = kfd_smi_task_to_pid(task);
+
        len = snprintf(fifo_in, sizeof(fifo_in), "%x ", event);
 
        va_start(args, fmt);
@@ -234,14 +252,15 @@ void kfd_smi_event_update_gpu_reset(struct kfd_node *dev, bool post_reset,
                amdgpu_reset_get_desc(reset_context, reset_cause,
                                      sizeof(reset_cause));
 
-       kfd_smi_event_add(0, dev, event, KFD_EVENT_FMT_UPDATE_GPU_RESET(
+       kfd_smi_event_add(NULL, dev, event, KFD_EVENT_FMT_UPDATE_GPU_RESET(
                          dev->reset_seq_num, reset_cause));
 }
 
 void kfd_smi_event_update_thermal_throttling(struct kfd_node *dev,
                                             uint64_t throttle_bitmask)
 {
-       kfd_smi_event_add(0, dev, KFD_SMI_EVENT_THERMAL_THROTTLE, KFD_EVENT_FMT_THERMAL_THROTTLING(
+       kfd_smi_event_add(NULL, dev, KFD_SMI_EVENT_THERMAL_THROTTLE,
+                         KFD_EVENT_FMT_THERMAL_THROTTLING(
                          throttle_bitmask,
                          amdgpu_dpm_get_thermal_throttling_counter(dev->adev)));
 }
@@ -254,67 +273,67 @@ void kfd_smi_event_update_vmfault(struct kfd_node *dev, uint16_t pasid)
        if (task_info) {
                /* Report VM faults from user applications, not retry from kernel */
                if (task_info->task.pid)
-                       kfd_smi_event_add(task_info->tgid, dev,
-                                         KFD_SMI_EVENT_VMFAULT,
-                                         KFD_EVENT_FMT_VMFAULT(task_info->task.pid,
-                                                               task_info->task.comm));
+                       kfd_smi_event_add(NULL, dev, KFD_SMI_EVENT_VMFAULT, KFD_EVENT_FMT_VMFAULT(
+                                         task_info->task.pid, task_info->task.comm));
                amdgpu_vm_put_task_info(task_info);
        }
 }
 
-void kfd_smi_event_page_fault_start(struct kfd_node *node, pid_t pid,
+void kfd_smi_event_page_fault_start(struct kfd_node *node, struct task_struct *task,
                                    unsigned long address, bool write_fault,
                                    ktime_t ts)
 {
-       kfd_smi_event_add(pid, node, KFD_SMI_EVENT_PAGE_FAULT_START,
-                         KFD_EVENT_FMT_PAGEFAULT_START(ktime_to_ns(ts), pid,
-                         address, node->id, write_fault ? 'W' : 'R'));
+       kfd_smi_event_add(task, node, KFD_SMI_EVENT_PAGE_FAULT_START,
+                         KFD_EVENT_FMT_PAGEFAULT_START(ktime_to_ns(ts),
+                         kfd_smi_task_to_pid(task), address, node->id,
+                         write_fault ? 'W' : 'R'));
 }
 
-void kfd_smi_event_page_fault_end(struct kfd_node *node, pid_t pid,
+void kfd_smi_event_page_fault_end(struct kfd_node *node, struct task_struct *task,
                                  unsigned long address, bool migration)
 {
-       kfd_smi_event_add(pid, node, KFD_SMI_EVENT_PAGE_FAULT_END,
+       kfd_smi_event_add(task, node, KFD_SMI_EVENT_PAGE_FAULT_END,
                          KFD_EVENT_FMT_PAGEFAULT_END(ktime_get_boottime_ns(),
-                         pid, address, node->id, migration ? 'M' : 'U'));
+                         kfd_smi_task_to_pid(task), address, node->id,
+                         migration ? 'M' : 'U'));
 }
 
-void kfd_smi_event_migration_start(struct kfd_node *node, pid_t pid,
+void kfd_smi_event_migration_start(struct kfd_node *node, struct task_struct *task,
                                   unsigned long start, unsigned long end,
                                   uint32_t from, uint32_t to,
                                   uint32_t prefetch_loc, uint32_t preferred_loc,
                                   uint32_t trigger)
 {
-       kfd_smi_event_add(pid, node, KFD_SMI_EVENT_MIGRATE_START,
-                         KFD_EVENT_FMT_MIGRATE_START(
-                         ktime_get_boottime_ns(), pid, start, end - start,
-                         from, to, prefetch_loc, preferred_loc, trigger));
+       kfd_smi_event_add(task, node, KFD_SMI_EVENT_MIGRATE_START,
+                         KFD_EVENT_FMT_MIGRATE_START(ktime_get_boottime_ns(),
+                         kfd_smi_task_to_pid(task), start, end - start, from,
+                         to, prefetch_loc, preferred_loc, trigger));
 }
 
-void kfd_smi_event_migration_end(struct kfd_node *node, pid_t pid,
+void kfd_smi_event_migration_end(struct kfd_node *node, struct task_struct *task,
                                 unsigned long start, unsigned long end,
                                 uint32_t from, uint32_t to, uint32_t trigger,
                                 int error_code)
 {
-       kfd_smi_event_add(pid, node, KFD_SMI_EVENT_MIGRATE_END,
-                         KFD_EVENT_FMT_MIGRATE_END(
-                         ktime_get_boottime_ns(), pid, start, end - start,
-                         from, to, trigger, error_code));
+       kfd_smi_event_add(task, node, KFD_SMI_EVENT_MIGRATE_END,
+                         KFD_EVENT_FMT_MIGRATE_END(ktime_get_boottime_ns(),
+                         kfd_smi_task_to_pid(task), start, end - start, from,
+                         to, trigger, error_code));
 }
 
-void kfd_smi_event_queue_eviction(struct kfd_node *node, pid_t pid,
+void kfd_smi_event_queue_eviction(struct kfd_node *node, struct task_struct *task,
                                  uint32_t trigger)
 {
-       kfd_smi_event_add(pid, node, KFD_SMI_EVENT_QUEUE_EVICTION,
-                         KFD_EVENT_FMT_QUEUE_EVICTION(ktime_get_boottime_ns(), pid,
-                         node->id, trigger));
+       kfd_smi_event_add(task, node, KFD_SMI_EVENT_QUEUE_EVICTION,
+                         KFD_EVENT_FMT_QUEUE_EVICTION(ktime_get_boottime_ns(),
+                         kfd_smi_task_to_pid(task), node->id, trigger));
 }
 
-void kfd_smi_event_queue_restore(struct kfd_node *node, pid_t pid)
+void kfd_smi_event_queue_restore(struct kfd_node *node, struct task_struct *task)
 {
-       kfd_smi_event_add(pid, node, KFD_SMI_EVENT_QUEUE_RESTORE,
-                         KFD_EVENT_FMT_QUEUE_RESTORE(ktime_get_boottime_ns(), pid,
-                         node->id, '0'));
+       kfd_smi_event_add(task, node, KFD_SMI_EVENT_QUEUE_RESTORE,
+                         KFD_EVENT_FMT_QUEUE_RESTORE(ktime_get_boottime_ns(),
+                         kfd_smi_task_to_pid(task), node->id, '0'));
 }
 
 void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm)
@@ -329,21 +348,23 @@ void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm)
        for (i = 0; i < p->n_pdds; i++) {
                struct kfd_process_device *pdd = p->pdds[i];
 
-               kfd_smi_event_add(p->lead_thread->pid, pdd->dev,
+               kfd_smi_event_add(p->lead_thread, pdd->dev,
                                  KFD_SMI_EVENT_QUEUE_RESTORE,
                                  KFD_EVENT_FMT_QUEUE_RESTORE(ktime_get_boottime_ns(),
-                                 p->lead_thread->pid, pdd->dev->id, 'R'));
+                                 kfd_smi_task_to_pid(p->lead_thread),
+                                 pdd->dev->id, 'R'));
        }
        kfd_unref_process(p);
 }
 
-void kfd_smi_event_unmap_from_gpu(struct kfd_node *node, pid_t pid,
+void kfd_smi_event_unmap_from_gpu(struct kfd_node *node, struct task_struct *task,
                                  unsigned long address, unsigned long last,
                                  uint32_t trigger)
 {
-       kfd_smi_event_add(pid, node, KFD_SMI_EVENT_UNMAP_FROM_GPU,
+       kfd_smi_event_add(task, node, KFD_SMI_EVENT_UNMAP_FROM_GPU,
                          KFD_EVENT_FMT_UNMAP_FROM_GPU(ktime_get_boottime_ns(),
-                         pid, address, last - address + 1, node->id, trigger));
+                         kfd_smi_task_to_pid(task), address,
+                         last - address + 1, node->id, trigger));
 }
 
 void kfd_smi_event_process(struct kfd_process_device *pdd, bool start)
@@ -358,7 +379,7 @@ void kfd_smi_event_process(struct kfd_process_device *pdd, bool start)
        task_info = amdgpu_vm_get_task_info_vm(avm);
 
        if (task_info) {
-               kfd_smi_event_add(task_info->tgid, pdd->dev,
+               kfd_smi_event_add(NULL, pdd->dev,
                                  start ? KFD_SMI_EVENT_PROCESS_START :
                                  KFD_SMI_EVENT_PROCESS_END,
                                  KFD_EVENT_FMT_PROCESS(task_info->task.pid,
@@ -387,7 +408,7 @@ int kfd_smi_event_open(struct kfd_node *dev, uint32_t *fd)
        spin_lock_init(&client->lock);
        client->events = 0;
        client->dev = dev;
-       client->pid = current->tgid;
+       client->pid = kfd_smi_task_to_pid(current);
        client->suser = capable(CAP_SYS_ADMIN);
 
        spin_lock(&dev->smi_lock);
index bb4d72b57387c913c509e14c597cbc888d56a497..afa93d7cfa7f345cf0cacb76f3c95ea22b1b0da6 100644 (file)
@@ -32,25 +32,25 @@ void kfd_smi_event_update_thermal_throttling(struct kfd_node *dev,
                                             uint64_t throttle_bitmask);
 void kfd_smi_event_update_gpu_reset(struct kfd_node *dev, bool post_reset,
                                    struct amdgpu_reset_context *reset_context);
-void kfd_smi_event_page_fault_start(struct kfd_node *node, pid_t pid,
+void kfd_smi_event_page_fault_start(struct kfd_node *node, struct task_struct *task,
                                    unsigned long address, bool write_fault,
                                    ktime_t ts);
-void kfd_smi_event_page_fault_end(struct kfd_node *node, pid_t pid,
+void kfd_smi_event_page_fault_end(struct kfd_node *node, struct task_struct *task,
                                  unsigned long address, bool migration);
-void kfd_smi_event_migration_start(struct kfd_node *node, pid_t pid,
+void kfd_smi_event_migration_start(struct kfd_node *node, struct task_struct *task,
                             unsigned long start, unsigned long end,
                             uint32_t from, uint32_t to,
                             uint32_t prefetch_loc, uint32_t preferred_loc,
                             uint32_t trigger);
-void kfd_smi_event_migration_end(struct kfd_node *node, pid_t pid,
+void kfd_smi_event_migration_end(struct kfd_node *node, struct task_struct *task,
                             unsigned long start, unsigned long end,
                             uint32_t from, uint32_t to, uint32_t trigger,
                             int error_code);
-void kfd_smi_event_queue_eviction(struct kfd_node *node, pid_t pid,
+void kfd_smi_event_queue_eviction(struct kfd_node *node, struct task_struct *task,
                                  uint32_t trigger);
-void kfd_smi_event_queue_restore(struct kfd_node *node, pid_t pid);
+void kfd_smi_event_queue_restore(struct kfd_node *node, struct task_struct *task);
 void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm);
-void kfd_smi_event_unmap_from_gpu(struct kfd_node *node, pid_t pid,
+void kfd_smi_event_unmap_from_gpu(struct kfd_node *node, struct task_struct *task,
                                  unsigned long address, unsigned long last,
                                  uint32_t trigger);
 void kfd_smi_event_process(struct kfd_process_device *pdd, bool start);
index 3841943da5ece6c2bd060ff2f0e05429e64f346f..d64d104783d403da33afbbe9acf44c813a016d86 100644 (file)
@@ -1408,7 +1408,7 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,
                        return -EINVAL;
                }
 
-               kfd_smi_event_unmap_from_gpu(pdd->dev, p->lead_thread->pid,
+               kfd_smi_event_unmap_from_gpu(pdd->dev, p->lead_thread,
                                             start, last, trigger);
 
                r = svm_range_unmap_from_gpu(pdd->dev->adev,
@@ -3205,7 +3205,7 @@ retry_write_locked:
                 svms, prange->start, prange->last, best_loc,
                 prange->actual_loc);
 
-       kfd_smi_event_page_fault_start(node, p->lead_thread->pid, addr,
+       kfd_smi_event_page_fault_start(node, p->lead_thread, addr,
                                       write_fault, timestamp);
 
        /* Align migration range start and size to granularity size */
@@ -3248,7 +3248,7 @@ retry_write_locked:
                         r, svms, start, last);
 
 out_migrate_fail:
-       kfd_smi_event_page_fault_end(node, p->lead_thread->pid, addr,
+       kfd_smi_event_page_fault_end(node, p->lead_thread, addr,
                                     migration);
 
 out_unlock_range: