From: Shane Xiao Date: Thu, 10 Apr 2025 04:35:15 +0000 (+0800) Subject: drm/amdkfd: Add rec SDMA engines support with limited XGMI X-Git-Tag: v6.16-rc1~144^2~10^2~218 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=c3abed53ca13f658c08727bd594f3874d216bde5;p=thirdparty%2Fkernel%2Flinux.git drm/amdkfd: Add rec SDMA engines support with limited XGMI This patch adds recommended SDMA engines with limited XGMI SDMA engines. It will help improve overall performance for device to device copies with this optimization. v2: Update the formatting issues and data type Signed-off-by: Shane Xiao Suggested-by: Jonathan Kim Reviewed-by: Jonathan Kim Signed-off-by: Alex Deucher --- diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 9bbee484d57cc..baa2374acdeb5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1267,34 +1267,41 @@ static void kfd_set_recommended_sdma_engines(struct kfd_topology_device *to_dev, { struct kfd_node *gpu = outbound_link->gpu; struct amdgpu_device *adev = gpu->adev; - int num_xgmi_nodes = adev->gmc.xgmi.num_physical_nodes; + unsigned int num_xgmi_nodes = adev->gmc.xgmi.num_physical_nodes; + unsigned int num_xgmi_sdma_engines = kfd_get_num_xgmi_sdma_engines(gpu); + unsigned int num_sdma_engines = kfd_get_num_sdma_engines(gpu); + uint32_t sdma_eng_id_mask = (1 << num_sdma_engines) - 1; + uint32_t xgmi_sdma_eng_id_mask = + ((1 << num_xgmi_sdma_engines) - 1) << num_sdma_engines; + bool support_rec_eng = !amdgpu_sriov_vf(adev) && to_dev->gpu && adev->aid_mask && num_xgmi_nodes && gpu->kfd->num_nodes == 1 && - kfd_get_num_xgmi_sdma_engines(gpu) >= 14 && - (!(adev->flags & AMD_IS_APU) && num_xgmi_nodes == 8); + num_xgmi_sdma_engines >= 6 && (!(adev->flags & AMD_IS_APU) && + num_xgmi_nodes == 8); if (support_rec_eng) { int src_socket_id = adev->gmc.xgmi.physical_node_id; int dst_socket_id = to_dev->gpu->adev->gmc.xgmi.physical_node_id; + unsigned int reshift = num_xgmi_sdma_engines == 6 ? 1 : 0; outbound_link->rec_sdma_eng_id_mask = - 1 << rec_sdma_eng_map[src_socket_id][dst_socket_id]; + 1 << (rec_sdma_eng_map[src_socket_id][dst_socket_id] >> reshift); inbound_link->rec_sdma_eng_id_mask = - 1 << rec_sdma_eng_map[dst_socket_id][src_socket_id]; - } else { - int num_sdma_eng = kfd_get_num_sdma_engines(gpu); - int i, eng_offset = 0; + 1 << (rec_sdma_eng_map[dst_socket_id][src_socket_id] >> reshift); - if (outbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI && - kfd_get_num_xgmi_sdma_engines(gpu) && to_dev->gpu) { - eng_offset = num_sdma_eng; - num_sdma_eng = kfd_get_num_xgmi_sdma_engines(gpu); - } + /* If recommended engine is out of range, need to reset the mask */ + if (outbound_link->rec_sdma_eng_id_mask & sdma_eng_id_mask) + outbound_link->rec_sdma_eng_id_mask = xgmi_sdma_eng_id_mask; + if (inbound_link->rec_sdma_eng_id_mask & sdma_eng_id_mask) + inbound_link->rec_sdma_eng_id_mask = xgmi_sdma_eng_id_mask; - for (i = 0; i < num_sdma_eng; i++) { - outbound_link->rec_sdma_eng_id_mask |= (1 << (i + eng_offset)); - inbound_link->rec_sdma_eng_id_mask |= (1 << (i + eng_offset)); - } + } else { + uint32_t engine_mask = (outbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI && + num_xgmi_sdma_engines && to_dev->gpu) ? xgmi_sdma_eng_id_mask : + sdma_eng_id_mask; + + outbound_link->rec_sdma_eng_id_mask = engine_mask; + inbound_link->rec_sdma_eng_id_mask = engine_mask; } }