From 3ed181b8ff43fc03c85d43a726e578bd131d39e4 Mon Sep 17 00:00:00 2001 From: Harish Kasiviswanathan Date: Wed, 1 May 2024 17:07:59 -0400 Subject: [PATCH] drm/amdkfd: Ensure gpu_id is unique gpu_id needs to be unique for user space to identify GPUs via KFD interface. In the current implementation there is a very small probability of having non unique gpu_ids. v2: Add check to confirm if gpu_id is unique. If not unique, find one Changed commit header to reflect the above v3: Use crc16 as suggested-by: Lijo Lazar Ensure that gpu_id != 0 Signed-off-by: Harish Kasiviswanathan Reviewed-by: Lijo Lazar Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 40 +++++++++++++++++++---- 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 219dcf504f24b..6f89b06f89d38 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "kfd_priv.h" #include "kfd_crat.h" @@ -1091,14 +1092,17 @@ void kfd_topology_shutdown(void) static uint32_t kfd_generate_gpu_id(struct kfd_node *gpu) { - uint32_t hashout; + uint32_t gpu_id; uint32_t buf[8]; uint64_t local_mem_size; - int i; + struct kfd_topology_device *dev; + bool is_unique; + uint8_t *crc_buf; if (!gpu) return 0; + crc_buf = (uint8_t *)&buf; local_mem_size = gpu->local_mem_info.local_mem_size_private + gpu->local_mem_info.local_mem_size_public; buf[0] = gpu->adev->pdev->devfn; @@ -1111,10 +1115,34 @@ static uint32_t kfd_generate_gpu_id(struct kfd_node *gpu) buf[6] = upper_32_bits(local_mem_size); buf[7] = (ffs(gpu->xcc_mask) - 1) | (NUM_XCC(gpu->xcc_mask) << 16); - for (i = 0, hashout = 0; i < 8; i++) - hashout ^= hash_32(buf[i], KFD_GPU_ID_HASH_WIDTH); + gpu_id = crc16(0, crc_buf, sizeof(buf)) & + ((1 << KFD_GPU_ID_HASH_WIDTH) - 1); - return hashout; + /* There is a very small possibility when generating a + * 16 (KFD_GPU_ID_HASH_WIDTH) bit value from 8 word buffer + * that the value could be 0 or non-unique. So, check if + * it is unique and non-zero. If not unique increment till + * unique one is found. In case of overflow, restart from 1 + */ + + down_read(&topology_lock); + do { + is_unique = true; + if (!gpu_id) + gpu_id = 1; + list_for_each_entry(dev, &topology_device_list, list) { + if (dev->gpu && dev->gpu_id == gpu_id) { + is_unique = false; + break; + } + } + if (unlikely(!is_unique)) + gpu_id = (gpu_id + 1) & + ((1 << KFD_GPU_ID_HASH_WIDTH) - 1); + } while (!is_unique); + up_read(&topology_lock); + + return gpu_id; } /* kfd_assign_gpu - Attach @gpu to the correct kfd topology device. If * the GPU device is not already present in the topology device @@ -1945,7 +1973,6 @@ int kfd_topology_add_device(struct kfd_node *gpu) struct amdgpu_gfx_config *gfx_info = &gpu->adev->gfx.config; struct amdgpu_cu_info *cu_info = &gpu->adev->gfx.cu_info; - gpu_id = kfd_generate_gpu_id(gpu); if (gpu->xcp && !gpu->xcp->ddev) { dev_warn(gpu->adev->dev, "Won't add GPU to topology since it has no drm node assigned."); @@ -1968,6 +1995,7 @@ int kfd_topology_add_device(struct kfd_node *gpu) if (res) return res; + gpu_id = kfd_generate_gpu_id(gpu); dev->gpu_id = gpu_id; gpu->id = gpu_id; -- 2.47.3