From: Geoffrey McRae Date: Mon, 1 Jun 2026 13:55:53 +0000 (+1000) Subject: drm/amdkfd: Fix NULL deref during sysfs teardown X-Git-Tag: v7.2-rc1~10^2~1^2~15 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d072a3f603c639ee12a05126aa0bab0ff1732323;p=thirdparty%2Fkernel%2Flinux.git drm/amdkfd: Fix NULL deref during sysfs teardown Move kfd_process_remove_sysfs() earlier in kfd_process_wq_release() so that all sysfs/procfs entries are removed before tearing down PDDs and dropping lead_thread. The per-process sysfs attributes are backed by struct kfd_process_device, and their show/store callbacks dereference PDD fields. Since sysfs removal waits for active callbacks to complete, removing these entries first closes a race where userspace reads sdma_* and stats_* files after PDD teardown. Previously this cleanup ran after kfd_process_destroy_pdds(), which resets p->n_pdds to 0. This meant kfd_process_remove_sysfs() could no longer walk the PDD array, so the per-PDD sysfs cleanup did not run as intended. This race caused NULL pointer dereferences observed in kfd_sdma_activity_worker and kfd_procfs_stats_show. Also harden kfd_process_remove_sysfs() against partially initialized or already-freed objects: - Check kobj_queues before removing PASID and deleting it - Guard kobj_stats and kobj_counters before use These checks prevent invalid dereferences during cleanup. Cc: Felix Kuehling Cc: Alex Deucher Signed-off-by: Geoffrey McRae Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher (cherry picked from commit 674c692702341fed321720b4b92036c5934fb485) --- diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 63815be995fc8..ca71fa726e32b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1175,10 +1175,12 @@ static void kfd_process_remove_sysfs(struct kfd_process *p) if (!p->kobj) return; - sysfs_remove_file(p->kobj, &p->attr_pasid); - kobject_del(p->kobj_queues); - kobject_put(p->kobj_queues); - p->kobj_queues = NULL; + if (p->kobj_queues) { + sysfs_remove_file(p->kobj, &p->attr_pasid); + kobject_del(p->kobj_queues); + kobject_put(p->kobj_queues); + p->kobj_queues = NULL; + } for (i = 0; i < p->n_pdds; i++) { pdd = p->pdds[i]; @@ -1186,17 +1188,21 @@ static void kfd_process_remove_sysfs(struct kfd_process *p) sysfs_remove_file(p->kobj, &pdd->attr_vram); sysfs_remove_file(p->kobj, &pdd->attr_sdma); - sysfs_remove_file(pdd->kobj_stats, &pdd->attr_evict); - if (pdd->dev->kfd2kgd->get_cu_occupancy) - sysfs_remove_file(pdd->kobj_stats, - &pdd->attr_cu_occupancy); - kobject_del(pdd->kobj_stats); - kobject_put(pdd->kobj_stats); - pdd->kobj_stats = NULL; + if (pdd->kobj_stats) { + sysfs_remove_file(pdd->kobj_stats, &pdd->attr_evict); + if (pdd->dev->kfd2kgd->get_cu_occupancy) + sysfs_remove_file(pdd->kobj_stats, + &pdd->attr_cu_occupancy); + kobject_del(pdd->kobj_stats); + kobject_put(pdd->kobj_stats); + pdd->kobj_stats = NULL; + } } for_each_set_bit(i, p->svms.bitmap_supported, p->n_pdds) { pdd = p->pdds[i]; + if (!pdd->kobj_counters) + continue; sysfs_remove_file(pdd->kobj_counters, &pdd->attr_faults); sysfs_remove_file(pdd->kobj_counters, &pdd->attr_page_in); @@ -1254,6 +1260,13 @@ static void kfd_process_wq_release(struct work_struct *work) kfd_debugfs_remove_process(p); + /* + * Remove the proc/sysfs entries before destroying PDDs. The removal path + * walks the PDD array and sysfs callbacks dereference PDD fields, so the + * backing data must remain valid until sysfs removal has completed. + */ + kfd_process_remove_sysfs(p); + kfd_process_kunmap_signal_bo(p); kfd_process_free_outstanding_kfd_bos(p); svm_range_list_fini(p); @@ -1267,11 +1280,6 @@ static void kfd_process_wq_release(struct work_struct *work) put_task_struct(p->lead_thread); - /* the last step is removing process entries under /sys - * to indicate the process has been terminated. - */ - kfd_process_remove_sysfs(p); - kfree(p); }