]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
drm/amdkfd: Fix NULL deref during sysfs teardown
authorGeoffrey McRae <geoffrey.mcrae@amd.com>
Mon, 1 Jun 2026 13:55:53 +0000 (23:55 +1000)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 17 Jun 2026 22:21:27 +0000 (18:21 -0400)
Move kfd_process_remove_sysfs() earlier in kfd_process_wq_release() so
that all sysfs/procfs entries are removed before tearing down PDDs and
dropping lead_thread. The per-process sysfs attributes are backed by
struct kfd_process_device, and their show/store callbacks dereference
PDD fields. Since sysfs removal waits for active callbacks to complete,
removing these entries first closes a race where userspace reads sdma_*
and stats_* files after PDD teardown.

Previously this cleanup ran after kfd_process_destroy_pdds(), which
resets p->n_pdds to 0. This meant kfd_process_remove_sysfs() could no
longer walk the PDD array, so the per-PDD sysfs cleanup did not run as
intended.

This race caused NULL pointer dereferences observed in
kfd_sdma_activity_worker and kfd_procfs_stats_show.

Also harden kfd_process_remove_sysfs() against partially
initialized or already-freed objects:
- Check kobj_queues before removing PASID and deleting it
- Guard kobj_stats and kobj_counters before use

These checks prevent invalid dereferences during cleanup.

Cc: Felix Kuehling <Felix.Kuehling@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Geoffrey McRae <geoffrey.mcrae@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 674c692702341fed321720b4b92036c5934fb485)

drivers/gpu/drm/amd/amdkfd/kfd_process.c

index 63815be995fc8e99f399fc5471641b0cfbc2ed90..ca71fa726e32b0f772f819e93141a8d9fda4da5c 100644 (file)
@@ -1175,10 +1175,12 @@ static void kfd_process_remove_sysfs(struct kfd_process *p)
        if (!p->kobj)
                return;
 
-       sysfs_remove_file(p->kobj, &p->attr_pasid);
-       kobject_del(p->kobj_queues);
-       kobject_put(p->kobj_queues);
-       p->kobj_queues = NULL;
+       if (p->kobj_queues) {
+               sysfs_remove_file(p->kobj, &p->attr_pasid);
+               kobject_del(p->kobj_queues);
+               kobject_put(p->kobj_queues);
+               p->kobj_queues = NULL;
+       }
 
        for (i = 0; i < p->n_pdds; i++) {
                pdd = p->pdds[i];
@@ -1186,17 +1188,21 @@ static void kfd_process_remove_sysfs(struct kfd_process *p)
                sysfs_remove_file(p->kobj, &pdd->attr_vram);
                sysfs_remove_file(p->kobj, &pdd->attr_sdma);
 
-               sysfs_remove_file(pdd->kobj_stats, &pdd->attr_evict);
-               if (pdd->dev->kfd2kgd->get_cu_occupancy)
-                       sysfs_remove_file(pdd->kobj_stats,
-                                         &pdd->attr_cu_occupancy);
-               kobject_del(pdd->kobj_stats);
-               kobject_put(pdd->kobj_stats);
-               pdd->kobj_stats = NULL;
+               if (pdd->kobj_stats) {
+                       sysfs_remove_file(pdd->kobj_stats, &pdd->attr_evict);
+                       if (pdd->dev->kfd2kgd->get_cu_occupancy)
+                               sysfs_remove_file(pdd->kobj_stats,
+                                                 &pdd->attr_cu_occupancy);
+                       kobject_del(pdd->kobj_stats);
+                       kobject_put(pdd->kobj_stats);
+                       pdd->kobj_stats = NULL;
+               }
        }
 
        for_each_set_bit(i, p->svms.bitmap_supported, p->n_pdds) {
                pdd = p->pdds[i];
+               if (!pdd->kobj_counters)
+                       continue;
 
                sysfs_remove_file(pdd->kobj_counters, &pdd->attr_faults);
                sysfs_remove_file(pdd->kobj_counters, &pdd->attr_page_in);
@@ -1254,6 +1260,13 @@ static void kfd_process_wq_release(struct work_struct *work)
 
        kfd_debugfs_remove_process(p);
 
+       /*
+        * Remove the proc/sysfs entries before destroying PDDs. The removal path
+        * walks the PDD array and sysfs callbacks dereference PDD fields, so the
+        * backing data must remain valid until sysfs removal has completed.
+        */
+       kfd_process_remove_sysfs(p);
+
        kfd_process_kunmap_signal_bo(p);
        kfd_process_free_outstanding_kfd_bos(p);
        svm_range_list_fini(p);
@@ -1267,11 +1280,6 @@ static void kfd_process_wq_release(struct work_struct *work)
 
        put_task_struct(p->lead_thread);
 
-       /* the last step is removing process entries under /sys
-        * to indicate the process has been terminated.
-        */
-       kfd_process_remove_sysfs(p);
-
        kfree(p);
 }