From 80350d1d0bbeaee3007083ac666d42e17cc97a35 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 10 Dec 2021 14:33:35 +0100 Subject: [PATCH] 5.10-stable patches added patches: drm-amd-amdkfd-adjust-dummy-functions-placement.patch drm-amdgpu-add-amdgpu_amdkfd_resume_iommu.patch drm-amdgpu-init-iommu-after-amdkfd-device-init.patch drm-amdgpu-move-iommu_resume-before-ip-init-resume.patch drm-amdkfd-fix-boot-failure-when-iommu-is-disabled-in-picasso.patch drm-amdkfd-separate-kfd_iommu_resume-from-kfd_resume.patch --- ...kfd-adjust-dummy-functions-placement.patch | 320 ++++++++++++++++++ ...mdgpu-add-amdgpu_amdkfd_resume_iommu.patch | 51 +++ ...-init-iommu-after-amdkfd-device-init.patch | 54 +++ ...e-iommu_resume-before-ip-init-resume.patch | 57 ++++ ...re-when-iommu-is-disabled-in-picasso.patch | 48 +++ ...ate-kfd_iommu_resume-from-kfd_resume.patch | 75 ++++ queue-5.10/series | 6 + 7 files changed, 611 insertions(+) create mode 100644 queue-5.10/drm-amd-amdkfd-adjust-dummy-functions-placement.patch create mode 100644 queue-5.10/drm-amdgpu-add-amdgpu_amdkfd_resume_iommu.patch create mode 100644 queue-5.10/drm-amdgpu-init-iommu-after-amdkfd-device-init.patch create mode 100644 queue-5.10/drm-amdgpu-move-iommu_resume-before-ip-init-resume.patch create mode 100644 queue-5.10/drm-amdkfd-fix-boot-failure-when-iommu-is-disabled-in-picasso.patch create mode 100644 queue-5.10/drm-amdkfd-separate-kfd_iommu_resume-from-kfd_resume.patch diff --git a/queue-5.10/drm-amd-amdkfd-adjust-dummy-functions-placement.patch b/queue-5.10/drm-amd-amdkfd-adjust-dummy-functions-placement.patch new file mode 100644 index 00000000000..471a1bb1827 --- /dev/null +++ b/queue-5.10/drm-amd-amdkfd-adjust-dummy-functions-placement.patch @@ -0,0 +1,320 @@ +From cd63989e0e6aa2eb66b461f2bae769e2550e47ac Mon Sep 17 00:00:00 2001 +From: Lang Yu +Date: Thu, 28 Jan 2021 10:27:03 +0800 +Subject: drm/amd/amdkfd: adjust dummy functions' placement + +From: Lang Yu + +commit cd63989e0e6aa2eb66b461f2bae769e2550e47ac upstream. + +Move all the dummy functions in amdgpu_amdkfd.c to +amdgpu_amdkfd.h as inline functions. + +Signed-off-by: Lang Yu +Suggested-by: Felix Kuehling +Reviewed-by: Felix Kuehling +Reviewed-by: Huang Rui +Signed-off-by: Alex Deucher +Signed-off-by: James Zhu +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 87 ------------------ + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 138 +++++++++++++++++++++++++---- + 2 files changed, 119 insertions(+), 106 deletions(-) + +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +@@ -47,12 +47,8 @@ int amdgpu_amdkfd_init(void) + amdgpu_amdkfd_total_mem_size = si.totalram - si.totalhigh; + amdgpu_amdkfd_total_mem_size *= si.mem_unit; + +-#ifdef CONFIG_HSA_AMD + ret = kgd2kfd_init(); + amdgpu_amdkfd_gpuvm_init_mem_limits(); +-#else +- ret = -ENOENT; +-#endif + kfd_initialized = !ret; + + return ret; +@@ -695,86 +691,3 @@ bool amdgpu_amdkfd_have_atomics_support( + + return adev->have_atomics_support; + } +- +-#ifndef CONFIG_HSA_AMD +-bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm) +-{ +- return false; +-} +- +-void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo) +-{ +-} +- +-int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo) +-{ +- return 0; +-} +- +-void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, +- struct amdgpu_vm *vm) +-{ +-} +- +-struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f) +-{ +- return NULL; +-} +- +-int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm) +-{ +- return 0; +-} +- +-struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev, +- unsigned int asic_type, bool vf) +-{ +- return NULL; +-} +- +-bool kgd2kfd_device_init(struct kfd_dev *kfd, +- struct drm_device *ddev, +- const struct kgd2kfd_shared_resources *gpu_resources) +-{ +- return false; +-} +- +-void kgd2kfd_device_exit(struct kfd_dev *kfd) +-{ +-} +- +-void kgd2kfd_exit(void) +-{ +-} +- +-void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) +-{ +-} +- +-int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) +-{ +- return 0; +-} +- +-int kgd2kfd_pre_reset(struct kfd_dev *kfd) +-{ +- return 0; +-} +- +-int kgd2kfd_post_reset(struct kfd_dev *kfd) +-{ +- return 0; +-} +- +-void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) +-{ +-} +- +-void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd) +-{ +-} +- +-void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask) +-{ +-} +-#endif +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +@@ -94,11 +94,6 @@ enum kgd_engine_type { + KGD_ENGINE_MAX + }; + +-struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, +- struct mm_struct *mm); +-bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm); +-struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f); +-int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo); + + struct amdkfd_process_info { + /* List head of all VMs that belong to a KFD process */ +@@ -132,8 +127,6 @@ void amdgpu_amdkfd_interrupt(struct amdg + void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev); + void amdgpu_amdkfd_device_init(struct amdgpu_device *adev); + void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev); +- +-int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm); + int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, + uint32_t vmid, uint64_t gpu_addr, + uint32_t *ib_cmd, uint32_t ib_len); +@@ -153,6 +146,38 @@ void amdgpu_amdkfd_gpu_reset(struct kgd_ + int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev, + int queue_bit); + ++struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, ++ struct mm_struct *mm); ++#if IS_ENABLED(CONFIG_HSA_AMD) ++bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm); ++struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f); ++int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo); ++int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm); ++#else ++static inline ++bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm) ++{ ++ return false; ++} ++ ++static inline ++struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f) ++{ ++ return NULL; ++} ++ ++static inline ++int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo) ++{ ++ return 0; ++} ++ ++static inline ++int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm) ++{ ++ return 0; ++} ++#endif + /* Shared API */ + int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size, + void **mem_obj, uint64_t *gpu_addr, +@@ -215,8 +240,6 @@ int amdgpu_amdkfd_gpuvm_acquire_process_ + struct file *filp, u32 pasid, + void **vm, void **process_info, + struct dma_fence **ef); +-void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, +- struct amdgpu_vm *vm); + void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm); + void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm); + uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm); +@@ -236,23 +259,43 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_ke + struct kgd_mem *mem, void **kptr, uint64_t *size); + int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info, + struct dma_fence **ef); +- + int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, + struct kfd_vm_fault_info *info); +- + int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, + struct dma_buf *dmabuf, + uint64_t va, void *vm, + struct kgd_mem **mem, uint64_t *size, + uint64_t *mmap_offset); +- +-void amdgpu_amdkfd_gpuvm_init_mem_limits(void); +-void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo); +- + int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, + struct tile_config *config); ++#if IS_ENABLED(CONFIG_HSA_AMD) ++void amdgpu_amdkfd_gpuvm_init_mem_limits(void); ++void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, ++ struct amdgpu_vm *vm); ++void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo); ++#else ++static inline ++void amdgpu_amdkfd_gpuvm_init_mem_limits(void) ++{ ++} + ++static inline ++void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, ++ struct amdgpu_vm *vm) ++{ ++} ++ ++static inline ++void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo) ++{ ++} ++#endif + /* KGD2KFD callbacks */ ++int kgd2kfd_quiesce_mm(struct mm_struct *mm); ++int kgd2kfd_resume_mm(struct mm_struct *mm); ++int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, ++ struct dma_fence *fence); ++#if IS_ENABLED(CONFIG_HSA_AMD) + int kgd2kfd_init(void); + void kgd2kfd_exit(void); + struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev, +@@ -266,11 +309,68 @@ int kgd2kfd_resume(struct kfd_dev *kfd, + int kgd2kfd_pre_reset(struct kfd_dev *kfd); + int kgd2kfd_post_reset(struct kfd_dev *kfd); + void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry); +-int kgd2kfd_quiesce_mm(struct mm_struct *mm); +-int kgd2kfd_resume_mm(struct mm_struct *mm); +-int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, +- struct dma_fence *fence); + void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd); + void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask); ++#else ++static inline int kgd2kfd_init(void) ++{ ++ return -ENOENT; ++} ++ ++static inline void kgd2kfd_exit(void) ++{ ++} + ++static inline ++struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev, ++ unsigned int asic_type, bool vf) ++{ ++ return NULL; ++} ++ ++static inline ++bool kgd2kfd_device_init(struct kfd_dev *kfd, struct drm_device *ddev, ++ const struct kgd2kfd_shared_resources *gpu_resources) ++{ ++ return false; ++} ++ ++static inline void kgd2kfd_device_exit(struct kfd_dev *kfd) ++{ ++} ++ ++static inline void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) ++{ ++} ++ ++static inline int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) ++{ ++ return 0; ++} ++ ++static inline int kgd2kfd_pre_reset(struct kfd_dev *kfd) ++{ ++ return 0; ++} ++ ++static inline int kgd2kfd_post_reset(struct kfd_dev *kfd) ++{ ++ return 0; ++} ++ ++static inline ++void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) ++{ ++} ++ ++static inline ++void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd) ++{ ++} ++ ++static inline ++void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask) ++{ ++} ++#endif + #endif /* AMDGPU_AMDKFD_H_INCLUDED */ diff --git a/queue-5.10/drm-amdgpu-add-amdgpu_amdkfd_resume_iommu.patch b/queue-5.10/drm-amdgpu-add-amdgpu_amdkfd_resume_iommu.patch new file mode 100644 index 00000000000..e3c7936e159 --- /dev/null +++ b/queue-5.10/drm-amdgpu-add-amdgpu_amdkfd_resume_iommu.patch @@ -0,0 +1,51 @@ +From 8066008482e533e91934bee49765bf8b4a7c40db Mon Sep 17 00:00:00 2001 +From: James Zhu +Date: Tue, 7 Sep 2021 11:27:31 -0400 +Subject: drm/amdgpu: add amdgpu_amdkfd_resume_iommu + +From: James Zhu + +commit 8066008482e533e91934bee49765bf8b4a7c40db upstream. + +Add amdgpu_amdkfd_resume_iommu for amdgpu. + +Bug: https://bugzilla.kernel.org/show_bug.cgi?id=211277 +Signed-off-by: James Zhu +Reviewed-by: Felix Kuehling +Signed-off-by: Alex Deucher +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 10 ++++++++++ + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 + + 2 files changed, 11 insertions(+) + +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +@@ -190,6 +190,16 @@ void amdgpu_amdkfd_suspend(struct amdgpu + kgd2kfd_suspend(adev->kfd.dev, run_pm); + } + ++int amdgpu_amdkfd_resume_iommu(struct amdgpu_device *adev) ++{ ++ int r = 0; ++ ++ if (adev->kfd.dev) ++ r = kgd2kfd_resume_iommu(adev->kfd.dev); ++ ++ return r; ++} ++ + int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm) + { + int r = 0; +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +@@ -121,6 +121,7 @@ int amdgpu_amdkfd_init(void); + void amdgpu_amdkfd_fini(void); + + void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm); ++int amdgpu_amdkfd_resume_iommu(struct amdgpu_device *adev); + int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm); + void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, + const void *ih_ring_entry); diff --git a/queue-5.10/drm-amdgpu-init-iommu-after-amdkfd-device-init.patch b/queue-5.10/drm-amdgpu-init-iommu-after-amdkfd-device-init.patch new file mode 100644 index 00000000000..6ed8f2476dc --- /dev/null +++ b/queue-5.10/drm-amdgpu-init-iommu-after-amdkfd-device-init.patch @@ -0,0 +1,54 @@ +From 714d9e4574d54596973ee3b0624ee4a16264d700 Mon Sep 17 00:00:00 2001 +From: Yifan Zhang +Date: Tue, 28 Sep 2021 15:42:35 +0800 +Subject: drm/amdgpu: init iommu after amdkfd device init + +From: Yifan Zhang + +commit 714d9e4574d54596973ee3b0624ee4a16264d700 upstream. + +This patch is to fix clinfo failure in Raven/Picasso: + +Number of platforms: 1 + Platform Profile: FULL_PROFILE + Platform Version: OpenCL 2.2 AMD-APP (3364.0) + Platform Name: AMD Accelerated Parallel Processing + Platform Vendor: Advanced Micro Devices, Inc. + Platform Extensions: cl_khr_icd cl_amd_event_callback + + Platform Name: AMD Accelerated Parallel Processing Number of devices: 0 + +Signed-off-by: Yifan Zhang +Reviewed-by: James Zhu +Tested-by: James Zhu +Acked-by: Felix Kuehling +Signed-off-by: Alex Deucher +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +@@ -2220,10 +2220,6 @@ static int amdgpu_device_ip_init(struct + if (r) + goto init_failed; + +- r = amdgpu_amdkfd_resume_iommu(adev); +- if (r) +- goto init_failed; +- + r = amdgpu_device_ip_hw_init_phase1(adev); + if (r) + goto init_failed; +@@ -2259,6 +2255,10 @@ static int amdgpu_device_ip_init(struct + amdgpu_xgmi_add_device(adev); + amdgpu_amdkfd_device_init(adev); + ++ r = amdgpu_amdkfd_resume_iommu(adev); ++ if (r) ++ goto init_failed; ++ + amdgpu_fru_get_product_info(adev); + + init_failed: diff --git a/queue-5.10/drm-amdgpu-move-iommu_resume-before-ip-init-resume.patch b/queue-5.10/drm-amdgpu-move-iommu_resume-before-ip-init-resume.patch new file mode 100644 index 00000000000..80a74e27c03 --- /dev/null +++ b/queue-5.10/drm-amdgpu-move-iommu_resume-before-ip-init-resume.patch @@ -0,0 +1,57 @@ +From f02abeb0779700c308e661a412451b38962b8a0b Mon Sep 17 00:00:00 2001 +From: James Zhu +Date: Tue, 7 Sep 2021 11:32:22 -0400 +Subject: drm/amdgpu: move iommu_resume before ip init/resume + +From: James Zhu + +commit f02abeb0779700c308e661a412451b38962b8a0b upstream. + +Separate iommu_resume from kfd_resume, and move it before +other amdgpu ip init/resume. + +Bug: https://bugzilla.kernel.org/show_bug.cgi?id=211277 +Signed-off-by: James Zhu +Reviewed-by: Felix Kuehling +Signed-off-by: Alex Deucher +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +@@ -2220,6 +2220,10 @@ static int amdgpu_device_ip_init(struct + if (r) + goto init_failed; + ++ r = amdgpu_amdkfd_resume_iommu(adev); ++ if (r) ++ goto init_failed; ++ + r = amdgpu_device_ip_hw_init_phase1(adev); + if (r) + goto init_failed; +@@ -2913,6 +2917,10 @@ static int amdgpu_device_ip_resume(struc + { + int r; + ++ r = amdgpu_amdkfd_resume_iommu(adev); ++ if (r) ++ return r; ++ + r = amdgpu_device_ip_resume_phase1(adev); + if (r) + return r; +@@ -4296,6 +4304,10 @@ static int amdgpu_do_asic_reset(struct a + + if (!r) { + dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n"); ++ r = amdgpu_amdkfd_resume_iommu(tmp_adev); ++ if (r) ++ goto out; ++ + r = amdgpu_device_ip_resume_phase1(tmp_adev); + if (r) + goto out; diff --git a/queue-5.10/drm-amdkfd-fix-boot-failure-when-iommu-is-disabled-in-picasso.patch b/queue-5.10/drm-amdkfd-fix-boot-failure-when-iommu-is-disabled-in-picasso.patch new file mode 100644 index 00000000000..212c1f408e2 --- /dev/null +++ b/queue-5.10/drm-amdkfd-fix-boot-failure-when-iommu-is-disabled-in-picasso.patch @@ -0,0 +1,48 @@ +From afd18180c07026f94a80ff024acef5f4159084a4 Mon Sep 17 00:00:00 2001 +From: Yifan Zhang +Date: Mon, 11 Oct 2021 20:37:01 +0800 +Subject: drm/amdkfd: fix boot failure when iommu is disabled in Picasso. + +From: Yifan Zhang + +commit afd18180c07026f94a80ff024acef5f4159084a4 upstream. + +When IOMMU disabled in sbios and kfd in iommuv2 path, iommuv2 +init will fail. But this failure should not block amdgpu driver init. + +Reported-by: youling +Tested-by: youling +Signed-off-by: Yifan Zhang +Reviewed-by: James Zhu +Signed-off-by: Alex Deucher +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ---- + drivers/gpu/drm/amd/amdkfd/kfd_device.c | 3 +++ + 2 files changed, 3 insertions(+), 4 deletions(-) + +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +@@ -2255,10 +2255,6 @@ static int amdgpu_device_ip_init(struct + amdgpu_xgmi_add_device(adev); + amdgpu_amdkfd_device_init(adev); + +- r = amdgpu_amdkfd_resume_iommu(adev); +- if (r) +- goto init_failed; +- + amdgpu_fru_get_product_info(adev); + + init_failed: +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c +@@ -751,6 +751,9 @@ bool kgd2kfd_device_init(struct kfd_dev + + kfd_cwsr_init(kfd); + ++ if(kgd2kfd_resume_iommu(kfd)) ++ goto device_iommu_error; ++ + if (kfd_resume(kfd)) + goto kfd_resume_error; + diff --git a/queue-5.10/drm-amdkfd-separate-kfd_iommu_resume-from-kfd_resume.patch b/queue-5.10/drm-amdkfd-separate-kfd_iommu_resume-from-kfd_resume.patch new file mode 100644 index 00000000000..0d385cc3daa --- /dev/null +++ b/queue-5.10/drm-amdkfd-separate-kfd_iommu_resume-from-kfd_resume.patch @@ -0,0 +1,75 @@ +From fefc01f042f44ede373ee66773b8238dd8fdcb55 Mon Sep 17 00:00:00 2001 +From: James Zhu +Date: Tue, 7 Sep 2021 11:13:02 -0400 +Subject: drm/amdkfd: separate kfd_iommu_resume from kfd_resume + +From: James Zhu + +commit fefc01f042f44ede373ee66773b8238dd8fdcb55 upstream. + +Separate kfd_iommu_resume from kfd_resume for fine-tuning +of amdgpu device init/resume/reset/recovery sequence. + +v2: squash in fix for !CONFIG_HSA_AMD + +Bug: https://bugzilla.kernel.org/show_bug.cgi?id=211277 +Signed-off-by: James Zhu +Reviewed-by: Felix Kuehling +Signed-off-by: Alex Deucher +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 6 ++++++ + drivers/gpu/drm/amd/amdkfd/kfd_device.c | 12 ++++++++---- + 2 files changed, 14 insertions(+), 4 deletions(-) + +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +@@ -305,6 +305,7 @@ bool kgd2kfd_device_init(struct kfd_dev + const struct kgd2kfd_shared_resources *gpu_resources); + void kgd2kfd_device_exit(struct kfd_dev *kfd); + void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm); ++int kgd2kfd_resume_iommu(struct kfd_dev *kfd); + int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm); + int kgd2kfd_pre_reset(struct kfd_dev *kfd); + int kgd2kfd_post_reset(struct kfd_dev *kfd); +@@ -343,6 +344,11 @@ static inline void kgd2kfd_suspend(struc + { + } + ++static int __maybe_unused kgd2kfd_resume_iommu(struct kfd_dev *kfd) ++{ ++ return 0; ++} ++ + static inline int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) + { + return 0; +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c +@@ -896,17 +896,21 @@ int kgd2kfd_resume(struct kfd_dev *kfd, + return ret; + } + +-static int kfd_resume(struct kfd_dev *kfd) ++int kgd2kfd_resume_iommu(struct kfd_dev *kfd) + { + int err = 0; + + err = kfd_iommu_resume(kfd); +- if (err) { ++ if (err) + dev_err(kfd_device, + "Failed to resume IOMMU for device %x:%x\n", + kfd->pdev->vendor, kfd->pdev->device); +- return err; +- } ++ return err; ++} ++ ++static int kfd_resume(struct kfd_dev *kfd) ++{ ++ int err = 0; + + err = kfd->dqm->ops.start(kfd->dqm); + if (err) { diff --git a/queue-5.10/series b/queue-5.10/series index 6b4d28d4fee..34484b0a929 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -18,3 +18,9 @@ can-kvaser_usb-get-can-clock-frequency-from-device.patch can-kvaser_pciefd-kvaser_pciefd_rx_error_frame-increase-correct-stats-rx-tx-_errors-counter.patch can-sja1000-fix-use-after-free-in-ems_pcmcia_add_card.patch x86-sme-explicitly-map-new-efi-memmap-table-as-encrypted.patch +drm-amd-amdkfd-adjust-dummy-functions-placement.patch +drm-amdkfd-separate-kfd_iommu_resume-from-kfd_resume.patch +drm-amdgpu-add-amdgpu_amdkfd_resume_iommu.patch +drm-amdgpu-move-iommu_resume-before-ip-init-resume.patch +drm-amdgpu-init-iommu-after-amdkfd-device-init.patch +drm-amdkfd-fix-boot-failure-when-iommu-is-disabled-in-picasso.patch -- 2.47.2