From: Greg Kroah-Hartman Date: Tue, 28 Mar 2023 13:32:37 +0000 (+0200) Subject: 6.1-stable patches X-Git-Tag: v5.15.105~11 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=47f79eb60fb80791a1b7354dd9f9ebe18f75e86c;p=thirdparty%2Fkernel%2Fstable-queue.git 6.1-stable patches added patches: drm-amdkfd-add-gc-11.0.4-kfd-support.patch drm-amdkfd-fix-the-warning-of-array-index-out-of-bounds.patch drm-amdkfd-introduce-dummy-cache-info-for-property-asic.patch --- diff --git a/queue-6.1/drm-amdkfd-add-gc-11.0.4-kfd-support.patch b/queue-6.1/drm-amdkfd-add-gc-11.0.4-kfd-support.patch new file mode 100644 index 00000000000..f1472cb0680 --- /dev/null +++ b/queue-6.1/drm-amdkfd-add-gc-11.0.4-kfd-support.patch @@ -0,0 +1,49 @@ +From 88c21c2b56aa21dd34290d43ada74033dc3bfe35 Mon Sep 17 00:00:00 2001 +From: Yifan Zhang +Date: Wed, 12 Oct 2022 13:01:22 +0800 +Subject: drm/amdkfd: add GC 11.0.4 KFD support + +From: Yifan Zhang + +commit 88c21c2b56aa21dd34290d43ada74033dc3bfe35 upstream. + +Add initial support for GC 11.0.4 in KFD compute driver. + +Signed-off-by: Yifan Zhang +Reviewed-by: Aaron Liu +Signed-off-by: Alex Deucher +Cc: "Limonciello, Mario" +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 1 + + drivers/gpu/drm/amd/amdkfd/kfd_device.c | 2 ++ + 2 files changed, 3 insertions(+) + +--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +@@ -1522,6 +1522,7 @@ int kfd_get_gpu_cache_info(struct kfd_de + case IP_VERSION(11, 0, 1): + case IP_VERSION(11, 0, 2): + case IP_VERSION(11, 0, 3): ++ case IP_VERSION(11, 0, 4): + num_of_cache_types = + kfd_fill_gpu_cache_info_from_gfx_config(kdev, *pcache_info); + break; +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c +@@ -154,6 +154,7 @@ static void kfd_device_info_set_event_in + case IP_VERSION(11, 0, 1): + case IP_VERSION(11, 0, 2): + case IP_VERSION(11, 0, 3): ++ case IP_VERSION(11, 0, 4): + kfd->device_info.event_interrupt_class = &event_interrupt_class_v11; + break; + default: +@@ -396,6 +397,7 @@ struct kfd_dev *kgd2kfd_probe(struct amd + f2g = &gfx_v11_kfd2kgd; + break; + case IP_VERSION(11, 0, 1): ++ case IP_VERSION(11, 0, 4): + gfx_target_version = 110003; + f2g = &gfx_v11_kfd2kgd; + break; diff --git a/queue-6.1/drm-amdkfd-fix-the-warning-of-array-index-out-of-bounds.patch b/queue-6.1/drm-amdkfd-fix-the-warning-of-array-index-out-of-bounds.patch new file mode 100644 index 00000000000..07d05c61d2a --- /dev/null +++ b/queue-6.1/drm-amdkfd-fix-the-warning-of-array-index-out-of-bounds.patch @@ -0,0 +1,853 @@ +From c0cc999f3c32e65a7c88fb323893ddf897b24488 Mon Sep 17 00:00:00 2001 +From: Ma Jun +Date: Wed, 2 Nov 2022 15:53:26 +0800 +Subject: drm/amdkfd: Fix the warning of array-index-out-of-bounds + +From: Ma Jun + +commit c0cc999f3c32e65a7c88fb323893ddf897b24488 upstream. + +For some GPUs with more CUs, the original sibling_map[32] +in struct crat_subtype_cache is not enough +to save the cache information when create the VCRAT table, +so skip filling the struct crat_subtype_cache info instead +fill struct kfd_cache_properties directly to fix this problem. + +Signed-off-by: Ma Jun +Reviewed-by: Felix Kuehling +Signed-off-by: Alex Deucher +Cc: "Limonciello, Mario" +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 312 +++--------------------------- + drivers/gpu/drm/amd/amdkfd/kfd_crat.h | 12 + + drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 245 ++++++++++++++++++++++- + drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 5 + 4 files changed, 282 insertions(+), 292 deletions(-) + +--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +@@ -50,16 +50,6 @@ static inline unsigned int get_and_inc_g + return current_id; + } + +-/* Static table to describe GPU Cache information */ +-struct kfd_gpu_cache_info { +- uint32_t cache_size; +- uint32_t cache_level; +- uint32_t flags; +- /* Indicates how many Compute Units share this cache +- * within a SA. Value = 1 indicates the cache is not shared +- */ +- uint32_t num_cu_shared; +-}; + + static struct kfd_gpu_cache_info kaveri_cache_info[] = { + { +@@ -1119,9 +1109,13 @@ static int kfd_parse_subtype_cache(struc + props->cachelines_per_tag = cache->lines_per_tag; + props->cache_assoc = cache->associativity; + props->cache_latency = cache->cache_latency; ++ + memcpy(props->sibling_map, cache->sibling_map, + sizeof(props->sibling_map)); + ++ /* set the sibling_map_size as 32 for CRAT from ACPI */ ++ props->sibling_map_size = CRAT_SIBLINGMAP_SIZE; ++ + if (cache->flags & CRAT_CACHE_FLAGS_DATA_CACHE) + props->cache_type |= HSA_CACHE_TYPE_DATA; + if (cache->flags & CRAT_CACHE_FLAGS_INST_CACHE) +@@ -1339,125 +1333,6 @@ err: + return ret; + } + +-/* Helper function. See kfd_fill_gpu_cache_info for parameter description */ +-static int fill_in_l1_pcache(struct crat_subtype_cache *pcache, +- struct kfd_gpu_cache_info *pcache_info, +- struct kfd_cu_info *cu_info, +- int mem_available, +- int cu_bitmask, +- int cache_type, unsigned int cu_processor_id, +- int cu_block) +-{ +- unsigned int cu_sibling_map_mask; +- int first_active_cu; +- +- /* First check if enough memory is available */ +- if (sizeof(struct crat_subtype_cache) > mem_available) +- return -ENOMEM; +- +- cu_sibling_map_mask = cu_bitmask; +- cu_sibling_map_mask >>= cu_block; +- cu_sibling_map_mask &= +- ((1 << pcache_info[cache_type].num_cu_shared) - 1); +- first_active_cu = ffs(cu_sibling_map_mask); +- +- /* CU could be inactive. In case of shared cache find the first active +- * CU. and incase of non-shared cache check if the CU is inactive. If +- * inactive active skip it +- */ +- if (first_active_cu) { +- memset(pcache, 0, sizeof(struct crat_subtype_cache)); +- pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY; +- pcache->length = sizeof(struct crat_subtype_cache); +- pcache->flags = pcache_info[cache_type].flags; +- pcache->processor_id_low = cu_processor_id +- + (first_active_cu - 1); +- pcache->cache_level = pcache_info[cache_type].cache_level; +- pcache->cache_size = pcache_info[cache_type].cache_size; +- +- /* Sibling map is w.r.t processor_id_low, so shift out +- * inactive CU +- */ +- cu_sibling_map_mask = +- cu_sibling_map_mask >> (first_active_cu - 1); +- +- pcache->sibling_map[0] = (uint8_t)(cu_sibling_map_mask & 0xFF); +- pcache->sibling_map[1] = +- (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF); +- pcache->sibling_map[2] = +- (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF); +- pcache->sibling_map[3] = +- (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); +- return 0; +- } +- return 1; +-} +- +-/* Helper function. See kfd_fill_gpu_cache_info for parameter description */ +-static int fill_in_l2_l3_pcache(struct crat_subtype_cache *pcache, +- struct kfd_gpu_cache_info *pcache_info, +- struct kfd_cu_info *cu_info, +- int mem_available, +- int cache_type, unsigned int cu_processor_id) +-{ +- unsigned int cu_sibling_map_mask; +- int first_active_cu; +- int i, j, k; +- +- /* First check if enough memory is available */ +- if (sizeof(struct crat_subtype_cache) > mem_available) +- return -ENOMEM; +- +- cu_sibling_map_mask = cu_info->cu_bitmap[0][0]; +- cu_sibling_map_mask &= +- ((1 << pcache_info[cache_type].num_cu_shared) - 1); +- first_active_cu = ffs(cu_sibling_map_mask); +- +- /* CU could be inactive. In case of shared cache find the first active +- * CU. and incase of non-shared cache check if the CU is inactive. If +- * inactive active skip it +- */ +- if (first_active_cu) { +- memset(pcache, 0, sizeof(struct crat_subtype_cache)); +- pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY; +- pcache->length = sizeof(struct crat_subtype_cache); +- pcache->flags = pcache_info[cache_type].flags; +- pcache->processor_id_low = cu_processor_id +- + (first_active_cu - 1); +- pcache->cache_level = pcache_info[cache_type].cache_level; +- pcache->cache_size = pcache_info[cache_type].cache_size; +- +- /* Sibling map is w.r.t processor_id_low, so shift out +- * inactive CU +- */ +- cu_sibling_map_mask = +- cu_sibling_map_mask >> (first_active_cu - 1); +- k = 0; +- for (i = 0; i < cu_info->num_shader_engines; i++) { +- for (j = 0; j < cu_info->num_shader_arrays_per_engine; +- j++) { +- pcache->sibling_map[k] = +- (uint8_t)(cu_sibling_map_mask & 0xFF); +- pcache->sibling_map[k+1] = +- (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF); +- pcache->sibling_map[k+2] = +- (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF); +- pcache->sibling_map[k+3] = +- (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); +- k += 4; +- cu_sibling_map_mask = +- cu_info->cu_bitmap[i % 4][j + i / 4]; +- cu_sibling_map_mask &= ( +- (1 << pcache_info[cache_type].num_cu_shared) +- - 1); +- } +- } +- return 0; +- } +- return 1; +-} +- +-#define KFD_MAX_CACHE_TYPES 6 + + static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, + struct kfd_gpu_cache_info *pcache_info) +@@ -1531,231 +1406,133 @@ static int kfd_fill_gpu_cache_info_from_ + return i; + } + +-/* kfd_fill_gpu_cache_info - Fill GPU cache info using kfd_gpu_cache_info +- * tables +- * +- * @kdev - [IN] GPU device +- * @gpu_processor_id - [IN] GPU processor ID to which these caches +- * associate +- * @available_size - [IN] Amount of memory available in pcache +- * @cu_info - [IN] Compute Unit info obtained from KGD +- * @pcache - [OUT] memory into which cache data is to be filled in. +- * @size_filled - [OUT] amount of data used up in pcache. +- * @num_of_entries - [OUT] number of caches added +- */ +-static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, +- int gpu_processor_id, +- int available_size, +- struct kfd_cu_info *cu_info, +- struct crat_subtype_cache *pcache, +- int *size_filled, +- int *num_of_entries) ++int kfd_get_gpu_cache_info(struct kfd_dev *kdev, struct kfd_gpu_cache_info **pcache_info) + { +- struct kfd_gpu_cache_info *pcache_info; +- struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES]; + int num_of_cache_types = 0; +- int i, j, k; +- int ct = 0; +- int mem_available = available_size; +- unsigned int cu_processor_id; +- int ret; +- unsigned int num_cu_shared; + + switch (kdev->adev->asic_type) { + case CHIP_KAVERI: +- pcache_info = kaveri_cache_info; ++ *pcache_info = kaveri_cache_info; + num_of_cache_types = ARRAY_SIZE(kaveri_cache_info); + break; + case CHIP_HAWAII: +- pcache_info = hawaii_cache_info; ++ *pcache_info = hawaii_cache_info; + num_of_cache_types = ARRAY_SIZE(hawaii_cache_info); + break; + case CHIP_CARRIZO: +- pcache_info = carrizo_cache_info; ++ *pcache_info = carrizo_cache_info; + num_of_cache_types = ARRAY_SIZE(carrizo_cache_info); + break; + case CHIP_TONGA: +- pcache_info = tonga_cache_info; ++ *pcache_info = tonga_cache_info; + num_of_cache_types = ARRAY_SIZE(tonga_cache_info); + break; + case CHIP_FIJI: +- pcache_info = fiji_cache_info; ++ *pcache_info = fiji_cache_info; + num_of_cache_types = ARRAY_SIZE(fiji_cache_info); + break; + case CHIP_POLARIS10: +- pcache_info = polaris10_cache_info; ++ *pcache_info = polaris10_cache_info; + num_of_cache_types = ARRAY_SIZE(polaris10_cache_info); + break; + case CHIP_POLARIS11: +- pcache_info = polaris11_cache_info; ++ *pcache_info = polaris11_cache_info; + num_of_cache_types = ARRAY_SIZE(polaris11_cache_info); + break; + case CHIP_POLARIS12: +- pcache_info = polaris12_cache_info; ++ *pcache_info = polaris12_cache_info; + num_of_cache_types = ARRAY_SIZE(polaris12_cache_info); + break; + case CHIP_VEGAM: +- pcache_info = vegam_cache_info; ++ *pcache_info = vegam_cache_info; + num_of_cache_types = ARRAY_SIZE(vegam_cache_info); + break; + default: + switch (KFD_GC_VERSION(kdev)) { + case IP_VERSION(9, 0, 1): +- pcache_info = vega10_cache_info; ++ *pcache_info = vega10_cache_info; + num_of_cache_types = ARRAY_SIZE(vega10_cache_info); + break; + case IP_VERSION(9, 2, 1): +- pcache_info = vega12_cache_info; ++ *pcache_info = vega12_cache_info; + num_of_cache_types = ARRAY_SIZE(vega12_cache_info); + break; + case IP_VERSION(9, 4, 0): + case IP_VERSION(9, 4, 1): +- pcache_info = vega20_cache_info; ++ *pcache_info = vega20_cache_info; + num_of_cache_types = ARRAY_SIZE(vega20_cache_info); + break; + case IP_VERSION(9, 4, 2): +- pcache_info = aldebaran_cache_info; ++ *pcache_info = aldebaran_cache_info; + num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info); + break; + case IP_VERSION(9, 1, 0): + case IP_VERSION(9, 2, 2): +- pcache_info = raven_cache_info; ++ *pcache_info = raven_cache_info; + num_of_cache_types = ARRAY_SIZE(raven_cache_info); + break; + case IP_VERSION(9, 3, 0): +- pcache_info = renoir_cache_info; ++ *pcache_info = renoir_cache_info; + num_of_cache_types = ARRAY_SIZE(renoir_cache_info); + break; + case IP_VERSION(10, 1, 10): + case IP_VERSION(10, 1, 2): + case IP_VERSION(10, 1, 3): + case IP_VERSION(10, 1, 4): +- pcache_info = navi10_cache_info; ++ *pcache_info = navi10_cache_info; + num_of_cache_types = ARRAY_SIZE(navi10_cache_info); + break; + case IP_VERSION(10, 1, 1): +- pcache_info = navi14_cache_info; ++ *pcache_info = navi14_cache_info; + num_of_cache_types = ARRAY_SIZE(navi14_cache_info); + break; + case IP_VERSION(10, 3, 0): +- pcache_info = sienna_cichlid_cache_info; ++ *pcache_info = sienna_cichlid_cache_info; + num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info); + break; + case IP_VERSION(10, 3, 2): +- pcache_info = navy_flounder_cache_info; ++ *pcache_info = navy_flounder_cache_info; + num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info); + break; + case IP_VERSION(10, 3, 4): +- pcache_info = dimgrey_cavefish_cache_info; ++ *pcache_info = dimgrey_cavefish_cache_info; + num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info); + break; + case IP_VERSION(10, 3, 1): +- pcache_info = vangogh_cache_info; ++ *pcache_info = vangogh_cache_info; + num_of_cache_types = ARRAY_SIZE(vangogh_cache_info); + break; + case IP_VERSION(10, 3, 5): +- pcache_info = beige_goby_cache_info; ++ *pcache_info = beige_goby_cache_info; + num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info); + break; + case IP_VERSION(10, 3, 3): +- pcache_info = yellow_carp_cache_info; ++ *pcache_info = yellow_carp_cache_info; + num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info); + break; + case IP_VERSION(10, 3, 6): +- pcache_info = gc_10_3_6_cache_info; ++ *pcache_info = gc_10_3_6_cache_info; + num_of_cache_types = ARRAY_SIZE(gc_10_3_6_cache_info); + break; + case IP_VERSION(10, 3, 7): +- pcache_info = gfx1037_cache_info; ++ *pcache_info = gfx1037_cache_info; + num_of_cache_types = ARRAY_SIZE(gfx1037_cache_info); + break; + case IP_VERSION(11, 0, 0): + case IP_VERSION(11, 0, 1): + case IP_VERSION(11, 0, 2): + case IP_VERSION(11, 0, 3): +- pcache_info = cache_info; + num_of_cache_types = +- kfd_fill_gpu_cache_info_from_gfx_config(kdev, pcache_info); ++ kfd_fill_gpu_cache_info_from_gfx_config(kdev, *pcache_info); + break; + default: +- pcache_info = dummy_cache_info; ++ *pcache_info = dummy_cache_info; + num_of_cache_types = ARRAY_SIZE(dummy_cache_info); + pr_warn("dummy cache info is used temporarily and real cache info need update later.\n"); + break; + } + } +- +- *size_filled = 0; +- *num_of_entries = 0; +- +- /* For each type of cache listed in the kfd_gpu_cache_info table, +- * go through all available Compute Units. +- * The [i,j,k] loop will +- * if kfd_gpu_cache_info.num_cu_shared = 1 +- * will parse through all available CU +- * If (kfd_gpu_cache_info.num_cu_shared != 1) +- * then it will consider only one CU from +- * the shared unit +- */ +- +- for (ct = 0; ct < num_of_cache_types; ct++) { +- cu_processor_id = gpu_processor_id; +- if (pcache_info[ct].cache_level == 1) { +- for (i = 0; i < cu_info->num_shader_engines; i++) { +- for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) { +- for (k = 0; k < cu_info->num_cu_per_sh; +- k += pcache_info[ct].num_cu_shared) { +- ret = fill_in_l1_pcache(pcache, +- pcache_info, +- cu_info, +- mem_available, +- cu_info->cu_bitmap[i % 4][j + i / 4], +- ct, +- cu_processor_id, +- k); +- +- if (ret < 0) +- break; +- +- if (!ret) { +- pcache++; +- (*num_of_entries)++; +- mem_available -= sizeof(*pcache); +- (*size_filled) += sizeof(*pcache); +- } +- +- /* Move to next CU block */ +- num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <= +- cu_info->num_cu_per_sh) ? +- pcache_info[ct].num_cu_shared : +- (cu_info->num_cu_per_sh - k); +- cu_processor_id += num_cu_shared; +- } +- } +- } +- } else { +- ret = fill_in_l2_l3_pcache(pcache, +- pcache_info, +- cu_info, +- mem_available, +- ct, +- cu_processor_id); +- +- if (ret < 0) +- break; +- +- if (!ret) { +- pcache++; +- (*num_of_entries)++; +- mem_available -= sizeof(*pcache); +- (*size_filled) += sizeof(*pcache); +- } +- } +- } +- +- pr_debug("Added [%d] GPU cache entries\n", *num_of_entries); +- +- return 0; ++ return num_of_cache_types; + } + + static bool kfd_ignore_crat(void) +@@ -2314,8 +2091,6 @@ static int kfd_create_vcrat_image_gpu(vo + struct kfd_cu_info cu_info; + int avail_size = *size; + uint32_t total_num_of_cu; +- int num_of_cache_entries = 0; +- int cache_mem_filled = 0; + uint32_t nid = 0; + int ret = 0; + +@@ -2416,31 +2191,12 @@ static int kfd_create_vcrat_image_gpu(vo + crat_table->length += sizeof(struct crat_subtype_memory); + crat_table->total_entries++; + +- /* TODO: Fill in cache information. This information is NOT readily +- * available in KGD +- */ +- sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + +- sub_type_hdr->length); +- ret = kfd_fill_gpu_cache_info(kdev, cu->processor_id_low, +- avail_size, +- &cu_info, +- (struct crat_subtype_cache *)sub_type_hdr, +- &cache_mem_filled, +- &num_of_cache_entries); +- +- if (ret < 0) +- return ret; +- +- crat_table->length += cache_mem_filled; +- crat_table->total_entries += num_of_cache_entries; +- avail_size -= cache_mem_filled; +- + /* Fill in Subtype: IO_LINKS + * Only direct links are added here which is Link from GPU to + * its NUMA node. Indirect links are added by userspace. + */ + sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + +- cache_mem_filled); ++ sub_type_hdr->length); + ret = kfd_fill_gpu_direct_io_link_to_cpu(&avail_size, kdev, + (struct crat_subtype_iolink *)sub_type_hdr, proximity_domain); + +--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h +@@ -317,6 +317,18 @@ struct cdit_header { + + struct kfd_dev; + ++/* Static table to describe GPU Cache information */ ++struct kfd_gpu_cache_info { ++ uint32_t cache_size; ++ uint32_t cache_level; ++ uint32_t flags; ++ /* Indicates how many Compute Units share this cache ++ * within a SA. Value = 1 indicates the cache is not shared ++ */ ++ uint32_t num_cu_shared; ++}; ++int kfd_get_gpu_cache_info(struct kfd_dev *kdev, struct kfd_gpu_cache_info **pcache_info); ++ + int kfd_create_crat_image_acpi(void **crat_image, size_t *size); + void kfd_destroy_crat_image(void *crat_image); + int kfd_parse_crat_table(void *crat_image, struct list_head *device_list, +--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +@@ -364,7 +364,6 @@ static ssize_t kfd_cache_show(struct kob + + /* Making sure that the buffer is an empty string */ + buffer[0] = 0; +- + cache = container_of(attr, struct kfd_cache_properties, attr); + if (cache->gpu && kfd_devcgroup_check_permission(cache->gpu)) + return -EPERM; +@@ -379,12 +378,13 @@ static ssize_t kfd_cache_show(struct kob + sysfs_show_32bit_prop(buffer, offs, "association", cache->cache_assoc); + sysfs_show_32bit_prop(buffer, offs, "latency", cache->cache_latency); + sysfs_show_32bit_prop(buffer, offs, "type", cache->cache_type); ++ + offs += snprintf(buffer+offs, PAGE_SIZE-offs, "sibling_map "); +- for (i = 0; i < CRAT_SIBLINGMAP_SIZE; i++) ++ for (i = 0; i < cache->sibling_map_size; i++) + for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++) + /* Check each bit */ + offs += snprintf(buffer+offs, PAGE_SIZE-offs, "%d,", +- (cache->sibling_map[i] >> j) & 1); ++ (cache->sibling_map[i] >> j) & 1); + + /* Replace the last "," with end of line */ + buffer[offs-1] = '\n'; +@@ -1198,7 +1198,6 @@ static struct kfd_topology_device *kfd_a + struct kfd_iolink_properties *iolink; + struct kfd_iolink_properties *p2plink; + +- down_write(&topology_lock); + list_for_each_entry(dev, &topology_device_list, list) { + /* Discrete GPUs need their own topology device list + * entries. Don't assign them to CPU/APU nodes. +@@ -1222,7 +1221,6 @@ static struct kfd_topology_device *kfd_a + break; + } + } +- up_write(&topology_lock); + return out_dev; + } + +@@ -1593,6 +1591,221 @@ out: + return ret; + } + ++ ++/* Helper function. See kfd_fill_gpu_cache_info for parameter description */ ++static int fill_in_l1_pcache(struct kfd_cache_properties **props_ext, ++ struct kfd_gpu_cache_info *pcache_info, ++ struct kfd_cu_info *cu_info, ++ int cu_bitmask, ++ int cache_type, unsigned int cu_processor_id, ++ int cu_block) ++{ ++ unsigned int cu_sibling_map_mask; ++ int first_active_cu; ++ struct kfd_cache_properties *pcache = NULL; ++ ++ cu_sibling_map_mask = cu_bitmask; ++ cu_sibling_map_mask >>= cu_block; ++ cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); ++ first_active_cu = ffs(cu_sibling_map_mask); ++ ++ /* CU could be inactive. In case of shared cache find the first active ++ * CU. and incase of non-shared cache check if the CU is inactive. If ++ * inactive active skip it ++ */ ++ if (first_active_cu) { ++ pcache = kfd_alloc_struct(pcache); ++ if (!pcache) ++ return -ENOMEM; ++ ++ memset(pcache, 0, sizeof(struct kfd_cache_properties)); ++ pcache->processor_id_low = cu_processor_id + (first_active_cu - 1); ++ pcache->cache_level = pcache_info[cache_type].cache_level; ++ pcache->cache_size = pcache_info[cache_type].cache_size; ++ ++ if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE) ++ pcache->cache_type |= HSA_CACHE_TYPE_DATA; ++ if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_INST_CACHE) ++ pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION; ++ if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_CPU_CACHE) ++ pcache->cache_type |= HSA_CACHE_TYPE_CPU; ++ if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_SIMD_CACHE) ++ pcache->cache_type |= HSA_CACHE_TYPE_HSACU; ++ ++ /* Sibling map is w.r.t processor_id_low, so shift out ++ * inactive CU ++ */ ++ cu_sibling_map_mask = ++ cu_sibling_map_mask >> (first_active_cu - 1); ++ ++ pcache->sibling_map[0] = (uint8_t)(cu_sibling_map_mask & 0xFF); ++ pcache->sibling_map[1] = ++ (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF); ++ pcache->sibling_map[2] = ++ (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF); ++ pcache->sibling_map[3] = ++ (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); ++ ++ pcache->sibling_map_size = 4; ++ *props_ext = pcache; ++ ++ return 0; ++ } ++ return 1; ++} ++ ++/* Helper function. See kfd_fill_gpu_cache_info for parameter description */ ++static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext, ++ struct kfd_gpu_cache_info *pcache_info, ++ struct kfd_cu_info *cu_info, ++ int cache_type, unsigned int cu_processor_id) ++{ ++ unsigned int cu_sibling_map_mask; ++ int first_active_cu; ++ int i, j, k; ++ struct kfd_cache_properties *pcache = NULL; ++ ++ cu_sibling_map_mask = cu_info->cu_bitmap[0][0]; ++ cu_sibling_map_mask &= ++ ((1 << pcache_info[cache_type].num_cu_shared) - 1); ++ first_active_cu = ffs(cu_sibling_map_mask); ++ ++ /* CU could be inactive. In case of shared cache find the first active ++ * CU. and incase of non-shared cache check if the CU is inactive. If ++ * inactive active skip it ++ */ ++ if (first_active_cu) { ++ pcache = kfd_alloc_struct(pcache); ++ if (!pcache) ++ return -ENOMEM; ++ ++ memset(pcache, 0, sizeof(struct kfd_cache_properties)); ++ pcache->processor_id_low = cu_processor_id ++ + (first_active_cu - 1); ++ pcache->cache_level = pcache_info[cache_type].cache_level; ++ pcache->cache_size = pcache_info[cache_type].cache_size; ++ ++ if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE) ++ pcache->cache_type |= HSA_CACHE_TYPE_DATA; ++ if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_INST_CACHE) ++ pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION; ++ if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_CPU_CACHE) ++ pcache->cache_type |= HSA_CACHE_TYPE_CPU; ++ if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_SIMD_CACHE) ++ pcache->cache_type |= HSA_CACHE_TYPE_HSACU; ++ ++ /* Sibling map is w.r.t processor_id_low, so shift out ++ * inactive CU ++ */ ++ cu_sibling_map_mask = cu_sibling_map_mask >> (first_active_cu - 1); ++ k = 0; ++ ++ for (i = 0; i < cu_info->num_shader_engines; i++) { ++ for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) { ++ pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF); ++ pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF); ++ pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF); ++ pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); ++ k += 4; ++ ++ cu_sibling_map_mask = cu_info->cu_bitmap[i % 4][j + i / 4]; ++ cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); ++ } ++ } ++ pcache->sibling_map_size = k; ++ *props_ext = pcache; ++ return 0; ++ } ++ return 1; ++} ++ ++#define KFD_MAX_CACHE_TYPES 6 ++ ++/* kfd_fill_cache_non_crat_info - Fill GPU cache info using kfd_gpu_cache_info ++ * tables ++ */ ++void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct kfd_dev *kdev) ++{ ++ struct kfd_gpu_cache_info *pcache_info = NULL; ++ int i, j, k; ++ int ct = 0; ++ unsigned int cu_processor_id; ++ int ret; ++ unsigned int num_cu_shared; ++ struct kfd_cu_info cu_info; ++ struct kfd_cu_info *pcu_info; ++ int gpu_processor_id; ++ struct kfd_cache_properties *props_ext; ++ int num_of_entries = 0; ++ int num_of_cache_types = 0; ++ struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES]; ++ ++ amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info); ++ pcu_info = &cu_info; ++ ++ gpu_processor_id = dev->node_props.simd_id_base; ++ ++ pcache_info = cache_info; ++ num_of_cache_types = kfd_get_gpu_cache_info(kdev, &pcache_info); ++ if (!num_of_cache_types) { ++ pr_warn("no cache info found\n"); ++ return; ++ } ++ ++ /* For each type of cache listed in the kfd_gpu_cache_info table, ++ * go through all available Compute Units. ++ * The [i,j,k] loop will ++ * if kfd_gpu_cache_info.num_cu_shared = 1 ++ * will parse through all available CU ++ * If (kfd_gpu_cache_info.num_cu_shared != 1) ++ * then it will consider only one CU from ++ * the shared unit ++ */ ++ for (ct = 0; ct < num_of_cache_types; ct++) { ++ cu_processor_id = gpu_processor_id; ++ if (pcache_info[ct].cache_level == 1) { ++ for (i = 0; i < pcu_info->num_shader_engines; i++) { ++ for (j = 0; j < pcu_info->num_shader_arrays_per_engine; j++) { ++ for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) { ++ ++ ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info, ++ pcu_info->cu_bitmap[i % 4][j + i / 4], ct, ++ cu_processor_id, k); ++ ++ if (ret < 0) ++ break; ++ ++ if (!ret) { ++ num_of_entries++; ++ list_add_tail(&props_ext->list, &dev->cache_props); ++ } ++ ++ /* Move to next CU block */ ++ num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <= ++ pcu_info->num_cu_per_sh) ? ++ pcache_info[ct].num_cu_shared : ++ (pcu_info->num_cu_per_sh - k); ++ cu_processor_id += num_cu_shared; ++ } ++ } ++ } ++ } else { ++ ret = fill_in_l2_l3_pcache(&props_ext, pcache_info, ++ pcu_info, ct, cu_processor_id); ++ ++ if (ret < 0) ++ break; ++ ++ if (!ret) { ++ num_of_entries++; ++ list_add_tail(&props_ext->list, &dev->cache_props); ++ } ++ } ++ } ++ dev->node_props.caches_count += num_of_entries; ++ pr_debug("Added [%d] GPU cache entries\n", num_of_entries); ++} ++ + int kfd_topology_add_device(struct kfd_dev *gpu) + { + uint32_t gpu_id; +@@ -1617,9 +1830,9 @@ int kfd_topology_add_device(struct kfd_d + * CRAT to create a new topology device. Once created assign the gpu to + * that topology device + */ ++ down_write(&topology_lock); + dev = kfd_assign_gpu(gpu); + if (!dev) { +- down_write(&topology_lock); + proximity_domain = ++topology_crat_proximity_domain; + + res = kfd_create_crat_image_virtual(&crat_image, &image_size, +@@ -1631,6 +1844,7 @@ int kfd_topology_add_device(struct kfd_d + topology_crat_proximity_domain--; + return res; + } ++ + res = kfd_parse_crat_table(crat_image, + &temp_topology_device_list, + proximity_domain); +@@ -1644,23 +1858,28 @@ int kfd_topology_add_device(struct kfd_d + kfd_topology_update_device_list(&temp_topology_device_list, + &topology_device_list); + ++ dev = kfd_assign_gpu(gpu); ++ if (WARN_ON(!dev)) { ++ res = -ENODEV; ++ goto err; ++ } ++ ++ /* Fill the cache affinity information here for the GPUs ++ * using VCRAT ++ */ ++ kfd_fill_cache_non_crat_info(dev, gpu); ++ + /* Update the SYSFS tree, since we added another topology + * device + */ + res = kfd_topology_update_sysfs(); +- up_write(&topology_lock); +- + if (!res) + sys_props.generation_count++; + else + pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n", + gpu_id, res); +- dev = kfd_assign_gpu(gpu); +- if (WARN_ON(!dev)) { +- res = -ENODEV; +- goto err; +- } + } ++ up_write(&topology_lock); + + dev->gpu_id = gpu_id; + gpu->id = gpu_id; +--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +@@ -80,6 +80,8 @@ struct kfd_mem_properties { + struct attribute attr; + }; + ++#define CACHE_SIBLINGMAP_SIZE 64 ++ + struct kfd_cache_properties { + struct list_head list; + uint32_t processor_id_low; +@@ -90,10 +92,11 @@ struct kfd_cache_properties { + uint32_t cache_assoc; + uint32_t cache_latency; + uint32_t cache_type; +- uint8_t sibling_map[CRAT_SIBLINGMAP_SIZE]; ++ uint8_t sibling_map[CACHE_SIBLINGMAP_SIZE]; + struct kfd_dev *gpu; + struct kobject *kobj; + struct attribute attr; ++ uint32_t sibling_map_size; + }; + + struct kfd_iolink_properties { diff --git a/queue-6.1/drm-amdkfd-introduce-dummy-cache-info-for-property-asic.patch b/queue-6.1/drm-amdkfd-introduce-dummy-cache-info-for-property-asic.patch new file mode 100644 index 00000000000..bb9fc4ce16f --- /dev/null +++ b/queue-6.1/drm-amdkfd-introduce-dummy-cache-info-for-property-asic.patch @@ -0,0 +1,89 @@ +From fd72e2cb2f9dd2734e8013b3e185a21f0d605d3e Mon Sep 17 00:00:00 2001 +From: Prike Liang +Date: Fri, 21 Oct 2022 16:38:48 -0400 +Subject: drm/amdkfd: introduce dummy cache info for property asic + +From: Prike Liang + +commit fd72e2cb2f9dd2734e8013b3e185a21f0d605d3e upstream. + +This dummy cache info will enable kfd base function support. + +Signed-off-by: Prike Liang +Reviewed-by: Alex Deucher +Signed-off-by: Alex Deucher +Cc: "Limonciello, Mario" +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 53 +++++++++++++++++++++++++++++++++- + 1 file changed, 52 insertions(+), 1 deletion(-) + +--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +@@ -891,6 +891,54 @@ static struct kfd_gpu_cache_info gc_10_3 + }, + }; + ++static struct kfd_gpu_cache_info dummy_cache_info[] = { ++ { ++ /* TCP L1 Cache per CU */ ++ .cache_size = 16, ++ .cache_level = 1, ++ .flags = (CRAT_CACHE_FLAGS_ENABLED | ++ CRAT_CACHE_FLAGS_DATA_CACHE | ++ CRAT_CACHE_FLAGS_SIMD_CACHE), ++ .num_cu_shared = 1, ++ }, ++ { ++ /* Scalar L1 Instruction Cache per SQC */ ++ .cache_size = 32, ++ .cache_level = 1, ++ .flags = (CRAT_CACHE_FLAGS_ENABLED | ++ CRAT_CACHE_FLAGS_INST_CACHE | ++ CRAT_CACHE_FLAGS_SIMD_CACHE), ++ .num_cu_shared = 2, ++ }, ++ { ++ /* Scalar L1 Data Cache per SQC */ ++ .cache_size = 16, ++ .cache_level = 1, ++ .flags = (CRAT_CACHE_FLAGS_ENABLED | ++ CRAT_CACHE_FLAGS_DATA_CACHE | ++ CRAT_CACHE_FLAGS_SIMD_CACHE), ++ .num_cu_shared = 2, ++ }, ++ { ++ /* GL1 Data Cache per SA */ ++ .cache_size = 128, ++ .cache_level = 1, ++ .flags = (CRAT_CACHE_FLAGS_ENABLED | ++ CRAT_CACHE_FLAGS_DATA_CACHE | ++ CRAT_CACHE_FLAGS_SIMD_CACHE), ++ .num_cu_shared = 6, ++ }, ++ { ++ /* L2 Data Cache per GPU (Total Tex Cache) */ ++ .cache_size = 2048, ++ .cache_level = 2, ++ .flags = (CRAT_CACHE_FLAGS_ENABLED | ++ CRAT_CACHE_FLAGS_DATA_CACHE | ++ CRAT_CACHE_FLAGS_SIMD_CACHE), ++ .num_cu_shared = 6, ++ }, ++}; ++ + static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev, + struct crat_subtype_computeunit *cu) + { +@@ -1630,7 +1678,10 @@ static int kfd_fill_gpu_cache_info(struc + kfd_fill_gpu_cache_info_from_gfx_config(kdev, pcache_info); + break; + default: +- return -EINVAL; ++ pcache_info = dummy_cache_info; ++ num_of_cache_types = ARRAY_SIZE(dummy_cache_info); ++ pr_warn("dummy cache info is used temporarily and real cache info need update later.\n"); ++ break; + } + } + diff --git a/queue-6.1/series b/queue-6.1/series index 1e002bb317f..744be745ff4 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -218,3 +218,6 @@ dm-crypt-add-cond_resched-to-dmcrypt_write.patch dm-crypt-avoid-accessing-uninitialized-tasklet.patch sched-fair-sanitize-vruntime-of-entity-being-placed.patch sched-fair-sanitize-vruntime-of-entity-being-migrated.patch +drm-amdkfd-introduce-dummy-cache-info-for-property-asic.patch +drm-amdkfd-fix-the-warning-of-array-index-out-of-bounds.patch +drm-amdkfd-add-gc-11.0.4-kfd-support.patch