From: Perry Yuan Date: Thu, 26 Feb 2026 09:50:33 +0000 (+0800) Subject: drm/amdgpu: Move KFD sched stop/start into PTL control path X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=1da21802df3a98d54159bba1d1b04d59f030b92a;p=thirdparty%2Fkernel%2Flinux.git drm/amdgpu: Move KFD sched stop/start into PTL control path Move amdgpu_amdkfd_stop/start_sched calls from kfd_ptl_control() into amdgpu_ptl_perf_monitor_ctrl() so all PTL callers (KFD ioctl, sysfs, GFX init) get consistent scheduling management. Add amdgpu_amdkfd_stop/start_sched_all() wrappers to stop and restart KFD scheduling on all nodes without assuming node ID ordering. v3: * call start/stop for PTL Set Only v2: * move the stop/start sched function to amdgpu_ptl_perf_monitor_ctrl(Lijo) * add wrapper amdgpu_amdkfd_stop_sched_all and amdgpu_amdkfd_start_sched_all (Lijo) Signed-off-by: Perry Yuan Reviewed-by: Yifan Zhang Signed-off-by: Alex Deucher --- diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 4fee011c2e261..2bf6a31c194da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -436,8 +436,10 @@ int kgd2kfd_check_and_lock_kfd(struct kfd_dev *kfd); void kgd2kfd_unlock_kfd(struct kfd_dev *kfd); int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id); int kgd2kfd_start_sched_all_nodes(struct kfd_dev *kfd); +int amdgpu_amdkfd_start_sched_all(struct amdgpu_device *adev); int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id); int kgd2kfd_stop_sched_all_nodes(struct kfd_dev *kfd); +int amdgpu_amdkfd_stop_sched_all(struct amdgpu_device *adev); bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id); bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry, bool retry_fault); @@ -534,6 +536,11 @@ static inline int kgd2kfd_start_sched_all_nodes(struct kfd_dev *kfd) return 0; } +static inline int amdgpu_amdkfd_start_sched_all(struct amdgpu_device *adev) +{ + return 0; +} + static inline int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id) { return 0; @@ -544,6 +551,11 @@ static inline int kgd2kfd_stop_sched_all_nodes(struct kfd_dev *kfd) return 0; } +static inline int amdgpu_amdkfd_stop_sched_all(struct amdgpu_device *adev) +{ + return 0; +} + static inline bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id) { return false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 5f8f0026483b2..a1db694063b1a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -1358,17 +1358,23 @@ int amdgpu_ptl_perf_monitor_ctrl(struct amdgpu_device *adev, u32 req_code, } } - /* Wait for GFX engine idle before PTL state transition */ if (req_code == PSP_PTL_PERF_MON_SET) { + amdgpu_amdkfd_stop_sched_all(adev); + /* Wait for GFX engine idle before PTL state transition */ ret = amdgpu_device_ip_wait_for_idle(adev, AMD_IP_BLOCK_TYPE_GFX); if (ret) { + amdgpu_amdkfd_start_sched_all(adev); dev_err(adev->dev, "GFX not idle before PTL operation (%d)\n", ret); return ret; } + ret = psp_ptl_invoke(psp, req_code, ptl_state, &ptl_fmt1, &ptl_fmt2); + amdgpu_amdkfd_start_sched_all(adev); + } else { + ret = psp_ptl_invoke(psp, req_code, ptl_state, &ptl_fmt1, &ptl_fmt2); } - return psp_ptl_invoke(psp, req_code, ptl_state, &ptl_fmt1, &ptl_fmt2); + return ret; } static enum amdgpu_ptl_fmt str_to_ptl_fmt(const char *str) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 5bea583732cbb..04ae3cb3a65ca 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1789,17 +1789,11 @@ static int kfd_ptl_control(struct kfd_process_device *pdd, bool enable) if (!pdd->dev->kfd2kgd || !pdd->dev->kfd2kgd->ptl_ctrl) return -EOPNOTSUPP; - if (adev->kfd.init_complete) - amdgpu_amdkfd_stop_sched(adev, pdd->dev->node_id); - ret = pdd->dev->kfd2kgd->ptl_ctrl(adev, PSP_PTL_PERF_MON_SET, &ptl_state, &pref_format1, &pref_format2); - if (adev->kfd.init_complete) - amdgpu_amdkfd_start_sched(adev, pdd->dev->node_id); - return ret; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index d649d8603e28e..c2c59781feee9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -1651,6 +1651,22 @@ int kgd2kfd_stop_sched_all_nodes(struct kfd_dev *kfd) return 0; } +int amdgpu_amdkfd_stop_sched_all(struct amdgpu_device *adev) +{ + if (!adev->kfd.init_complete) + return 0; + + return kgd2kfd_stop_sched_all_nodes(adev->kfd.dev); +} + +int amdgpu_amdkfd_start_sched_all(struct amdgpu_device *adev) +{ + if (!adev->kfd.init_complete) + return 0; + + return kgd2kfd_start_sched_all_nodes(adev->kfd.dev); +} + bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id) { struct kfd_node *node;