From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 28 Mar 2023 13:32:37 +0000 (+0200)
Subject: 6.1-stable patches
X-Git-Tag: v5.15.105~11
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=47f79eb60fb80791a1b7354dd9f9ebe18f75e86c;p=thirdparty%2Fkernel%2Fstable-queue.git

6.1-stable patches

added patches:
	drm-amdkfd-add-gc-11.0.4-kfd-support.patch
	drm-amdkfd-fix-the-warning-of-array-index-out-of-bounds.patch
	drm-amdkfd-introduce-dummy-cache-info-for-property-asic.patch
---

diff --git a/queue-6.1/drm-amdkfd-add-gc-11.0.4-kfd-support.patch b/queue-6.1/drm-amdkfd-add-gc-11.0.4-kfd-support.patch
new file mode 100644
index 00000000000..f1472cb0680
--- /dev/null
+++ b/queue-6.1/drm-amdkfd-add-gc-11.0.4-kfd-support.patch
@@ -0,0 +1,49 @@
+From 88c21c2b56aa21dd34290d43ada74033dc3bfe35 Mon Sep 17 00:00:00 2001
+From: Yifan Zhang <yifan1.zhang@amd.com>
+Date: Wed, 12 Oct 2022 13:01:22 +0800
+Subject: drm/amdkfd: add GC 11.0.4 KFD support
+
+From: Yifan Zhang <yifan1.zhang@amd.com>
+
+commit 88c21c2b56aa21dd34290d43ada74033dc3bfe35 upstream.
+
+Add initial support for GC 11.0.4 in KFD compute driver.
+
+Signed-off-by: Yifan Zhang <yifan1.zhang@amd.com>
+Reviewed-by: Aaron Liu <aaron.liu@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: "Limonciello, Mario" <Mario.Limonciello@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdkfd/kfd_crat.c   |    1 +
+ drivers/gpu/drm/amd/amdkfd/kfd_device.c |    2 ++
+ 2 files changed, 3 insertions(+)
+
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+@@ -1522,6 +1522,7 @@ int kfd_get_gpu_cache_info(struct kfd_de
+ 		case IP_VERSION(11, 0, 1):
+ 		case IP_VERSION(11, 0, 2):
+ 		case IP_VERSION(11, 0, 3):
++		case IP_VERSION(11, 0, 4):
+ 			num_of_cache_types =
+ 				kfd_fill_gpu_cache_info_from_gfx_config(kdev, *pcache_info);
+ 			break;
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+@@ -154,6 +154,7 @@ static void kfd_device_info_set_event_in
+ 	case IP_VERSION(11, 0, 1):
+ 	case IP_VERSION(11, 0, 2):
+ 	case IP_VERSION(11, 0, 3):
++	case IP_VERSION(11, 0, 4):
+ 		kfd->device_info.event_interrupt_class = &event_interrupt_class_v11;
+ 		break;
+ 	default:
+@@ -396,6 +397,7 @@ struct kfd_dev *kgd2kfd_probe(struct amd
+ 			f2g = &gfx_v11_kfd2kgd;
+ 			break;
+ 		case IP_VERSION(11, 0, 1):
++		case IP_VERSION(11, 0, 4):
+ 			gfx_target_version = 110003;
+ 			f2g = &gfx_v11_kfd2kgd;
+ 			break;
diff --git a/queue-6.1/drm-amdkfd-fix-the-warning-of-array-index-out-of-bounds.patch b/queue-6.1/drm-amdkfd-fix-the-warning-of-array-index-out-of-bounds.patch
new file mode 100644
index 00000000000..07d05c61d2a
--- /dev/null
+++ b/queue-6.1/drm-amdkfd-fix-the-warning-of-array-index-out-of-bounds.patch
@@ -0,0 +1,853 @@
+From c0cc999f3c32e65a7c88fb323893ddf897b24488 Mon Sep 17 00:00:00 2001
+From: Ma Jun <Jun.Ma2@amd.com>
+Date: Wed, 2 Nov 2022 15:53:26 +0800
+Subject: drm/amdkfd: Fix the warning of array-index-out-of-bounds
+
+From: Ma Jun <Jun.Ma2@amd.com>
+
+commit c0cc999f3c32e65a7c88fb323893ddf897b24488 upstream.
+
+For some GPUs with more CUs, the original sibling_map[32]
+in struct crat_subtype_cache is not enough
+to save the cache information when create the VCRAT table,
+so skip filling the struct crat_subtype_cache info instead
+fill struct kfd_cache_properties directly to fix this problem.
+
+Signed-off-by: Ma Jun <Jun.Ma2@amd.com>
+Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: "Limonciello, Mario" <Mario.Limonciello@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdkfd/kfd_crat.c     |  312 +++---------------------------
+ drivers/gpu/drm/amd/amdkfd/kfd_crat.h     |   12 +
+ drivers/gpu/drm/amd/amdkfd/kfd_topology.c |  245 ++++++++++++++++++++++-
+ drivers/gpu/drm/amd/amdkfd/kfd_topology.h |    5 
+ 4 files changed, 282 insertions(+), 292 deletions(-)
+
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+@@ -50,16 +50,6 @@ static inline unsigned int get_and_inc_g
+ 	return current_id;
+ }
+ 
+-/* Static table to describe GPU Cache information */
+-struct kfd_gpu_cache_info {
+-	uint32_t	cache_size;
+-	uint32_t	cache_level;
+-	uint32_t	flags;
+-	/* Indicates how many Compute Units share this cache
+-	 * within a SA. Value = 1 indicates the cache is not shared
+-	 */
+-	uint32_t	num_cu_shared;
+-};
+ 
+ static struct kfd_gpu_cache_info kaveri_cache_info[] = {
+ 	{
+@@ -1119,9 +1109,13 @@ static int kfd_parse_subtype_cache(struc
+ 			props->cachelines_per_tag = cache->lines_per_tag;
+ 			props->cache_assoc = cache->associativity;
+ 			props->cache_latency = cache->cache_latency;
++
+ 			memcpy(props->sibling_map, cache->sibling_map,
+ 					sizeof(props->sibling_map));
+ 
++			/* set the sibling_map_size as 32 for CRAT from ACPI */
++			props->sibling_map_size = CRAT_SIBLINGMAP_SIZE;
++
+ 			if (cache->flags & CRAT_CACHE_FLAGS_DATA_CACHE)
+ 				props->cache_type |= HSA_CACHE_TYPE_DATA;
+ 			if (cache->flags & CRAT_CACHE_FLAGS_INST_CACHE)
+@@ -1339,125 +1333,6 @@ err:
+ 	return ret;
+ }
+ 
+-/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
+-static int fill_in_l1_pcache(struct crat_subtype_cache *pcache,
+-				struct kfd_gpu_cache_info *pcache_info,
+-				struct kfd_cu_info *cu_info,
+-				int mem_available,
+-				int cu_bitmask,
+-				int cache_type, unsigned int cu_processor_id,
+-				int cu_block)
+-{
+-	unsigned int cu_sibling_map_mask;
+-	int first_active_cu;
+-
+-	/* First check if enough memory is available */
+-	if (sizeof(struct crat_subtype_cache) > mem_available)
+-		return -ENOMEM;
+-
+-	cu_sibling_map_mask = cu_bitmask;
+-	cu_sibling_map_mask >>= cu_block;
+-	cu_sibling_map_mask &=
+-		((1 << pcache_info[cache_type].num_cu_shared) - 1);
+-	first_active_cu = ffs(cu_sibling_map_mask);
+-
+-	/* CU could be inactive. In case of shared cache find the first active
+-	 * CU. and incase of non-shared cache check if the CU is inactive. If
+-	 * inactive active skip it
+-	 */
+-	if (first_active_cu) {
+-		memset(pcache, 0, sizeof(struct crat_subtype_cache));
+-		pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY;
+-		pcache->length = sizeof(struct crat_subtype_cache);
+-		pcache->flags = pcache_info[cache_type].flags;
+-		pcache->processor_id_low = cu_processor_id
+-					 + (first_active_cu - 1);
+-		pcache->cache_level = pcache_info[cache_type].cache_level;
+-		pcache->cache_size = pcache_info[cache_type].cache_size;
+-
+-		/* Sibling map is w.r.t processor_id_low, so shift out
+-		 * inactive CU
+-		 */
+-		cu_sibling_map_mask =
+-			cu_sibling_map_mask >> (first_active_cu - 1);
+-
+-		pcache->sibling_map[0] = (uint8_t)(cu_sibling_map_mask & 0xFF);
+-		pcache->sibling_map[1] =
+-				(uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
+-		pcache->sibling_map[2] =
+-				(uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
+-		pcache->sibling_map[3] =
+-				(uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
+-		return 0;
+-	}
+-	return 1;
+-}
+-
+-/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
+-static int fill_in_l2_l3_pcache(struct crat_subtype_cache *pcache,
+-				struct kfd_gpu_cache_info *pcache_info,
+-				struct kfd_cu_info *cu_info,
+-				int mem_available,
+-				int cache_type, unsigned int cu_processor_id)
+-{
+-	unsigned int cu_sibling_map_mask;
+-	int first_active_cu;
+-	int i, j, k;
+-
+-	/* First check if enough memory is available */
+-	if (sizeof(struct crat_subtype_cache) > mem_available)
+-		return -ENOMEM;
+-
+-	cu_sibling_map_mask = cu_info->cu_bitmap[0][0];
+-	cu_sibling_map_mask &=
+-		((1 << pcache_info[cache_type].num_cu_shared) - 1);
+-	first_active_cu = ffs(cu_sibling_map_mask);
+-
+-	/* CU could be inactive. In case of shared cache find the first active
+-	 * CU. and incase of non-shared cache check if the CU is inactive. If
+-	 * inactive active skip it
+-	 */
+-	if (first_active_cu) {
+-		memset(pcache, 0, sizeof(struct crat_subtype_cache));
+-		pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY;
+-		pcache->length = sizeof(struct crat_subtype_cache);
+-		pcache->flags = pcache_info[cache_type].flags;
+-		pcache->processor_id_low = cu_processor_id
+-					 + (first_active_cu - 1);
+-		pcache->cache_level = pcache_info[cache_type].cache_level;
+-		pcache->cache_size = pcache_info[cache_type].cache_size;
+-
+-		/* Sibling map is w.r.t processor_id_low, so shift out
+-		 * inactive CU
+-		 */
+-		cu_sibling_map_mask =
+-			cu_sibling_map_mask >> (first_active_cu - 1);
+-		k = 0;
+-		for (i = 0; i < cu_info->num_shader_engines; i++) {
+-			for (j = 0; j < cu_info->num_shader_arrays_per_engine;
+-				j++) {
+-				pcache->sibling_map[k] =
+-				 (uint8_t)(cu_sibling_map_mask & 0xFF);
+-				pcache->sibling_map[k+1] =
+-				 (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
+-				pcache->sibling_map[k+2] =
+-				 (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
+-				pcache->sibling_map[k+3] =
+-				 (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
+-				k += 4;
+-				cu_sibling_map_mask =
+-					cu_info->cu_bitmap[i % 4][j + i / 4];
+-				cu_sibling_map_mask &= (
+-				 (1 << pcache_info[cache_type].num_cu_shared)
+-				 - 1);
+-			}
+-		}
+-		return 0;
+-	}
+-	return 1;
+-}
+-
+-#define KFD_MAX_CACHE_TYPES 6
+ 
+ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
+ 						   struct kfd_gpu_cache_info *pcache_info)
+@@ -1531,231 +1406,133 @@ static int kfd_fill_gpu_cache_info_from_
+ 	return i;
+ }
+ 
+-/* kfd_fill_gpu_cache_info - Fill GPU cache info using kfd_gpu_cache_info
+- * tables
+- *
+- *	@kdev - [IN] GPU device
+- *	@gpu_processor_id - [IN] GPU processor ID to which these caches
+- *			    associate
+- *	@available_size - [IN] Amount of memory available in pcache
+- *	@cu_info - [IN] Compute Unit info obtained from KGD
+- *	@pcache - [OUT] memory into which cache data is to be filled in.
+- *	@size_filled - [OUT] amount of data used up in pcache.
+- *	@num_of_entries - [OUT] number of caches added
+- */
+-static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
+-			int gpu_processor_id,
+-			int available_size,
+-			struct kfd_cu_info *cu_info,
+-			struct crat_subtype_cache *pcache,
+-			int *size_filled,
+-			int *num_of_entries)
++int kfd_get_gpu_cache_info(struct kfd_dev *kdev, struct kfd_gpu_cache_info **pcache_info)
+ {
+-	struct kfd_gpu_cache_info *pcache_info;
+-	struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES];
+ 	int num_of_cache_types = 0;
+-	int i, j, k;
+-	int ct = 0;
+-	int mem_available = available_size;
+-	unsigned int cu_processor_id;
+-	int ret;
+-	unsigned int num_cu_shared;
+ 
+ 	switch (kdev->adev->asic_type) {
+ 	case CHIP_KAVERI:
+-		pcache_info = kaveri_cache_info;
++		*pcache_info = kaveri_cache_info;
+ 		num_of_cache_types = ARRAY_SIZE(kaveri_cache_info);
+ 		break;
+ 	case CHIP_HAWAII:
+-		pcache_info = hawaii_cache_info;
++		*pcache_info = hawaii_cache_info;
+ 		num_of_cache_types = ARRAY_SIZE(hawaii_cache_info);
+ 		break;
+ 	case CHIP_CARRIZO:
+-		pcache_info = carrizo_cache_info;
++		*pcache_info = carrizo_cache_info;
+ 		num_of_cache_types = ARRAY_SIZE(carrizo_cache_info);
+ 		break;
+ 	case CHIP_TONGA:
+-		pcache_info = tonga_cache_info;
++		*pcache_info = tonga_cache_info;
+ 		num_of_cache_types = ARRAY_SIZE(tonga_cache_info);
+ 		break;
+ 	case CHIP_FIJI:
+-		pcache_info = fiji_cache_info;
++		*pcache_info = fiji_cache_info;
+ 		num_of_cache_types = ARRAY_SIZE(fiji_cache_info);
+ 		break;
+ 	case CHIP_POLARIS10:
+-		pcache_info = polaris10_cache_info;
++		*pcache_info = polaris10_cache_info;
+ 		num_of_cache_types = ARRAY_SIZE(polaris10_cache_info);
+ 		break;
+ 	case CHIP_POLARIS11:
+-		pcache_info = polaris11_cache_info;
++		*pcache_info = polaris11_cache_info;
+ 		num_of_cache_types = ARRAY_SIZE(polaris11_cache_info);
+ 		break;
+ 	case CHIP_POLARIS12:
+-		pcache_info = polaris12_cache_info;
++		*pcache_info = polaris12_cache_info;
+ 		num_of_cache_types = ARRAY_SIZE(polaris12_cache_info);
+ 		break;
+ 	case CHIP_VEGAM:
+-		pcache_info = vegam_cache_info;
++		*pcache_info = vegam_cache_info;
+ 		num_of_cache_types = ARRAY_SIZE(vegam_cache_info);
+ 		break;
+ 	default:
+ 		switch (KFD_GC_VERSION(kdev)) {
+ 		case IP_VERSION(9, 0, 1):
+-			pcache_info = vega10_cache_info;
++			*pcache_info = vega10_cache_info;
+ 			num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
+ 			break;
+ 		case IP_VERSION(9, 2, 1):
+-			pcache_info = vega12_cache_info;
++			*pcache_info = vega12_cache_info;
+ 			num_of_cache_types = ARRAY_SIZE(vega12_cache_info);
+ 			break;
+ 		case IP_VERSION(9, 4, 0):
+ 		case IP_VERSION(9, 4, 1):
+-			pcache_info = vega20_cache_info;
++			*pcache_info = vega20_cache_info;
+ 			num_of_cache_types = ARRAY_SIZE(vega20_cache_info);
+ 			break;
+ 		case IP_VERSION(9, 4, 2):
+-			pcache_info = aldebaran_cache_info;
++			*pcache_info = aldebaran_cache_info;
+ 			num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info);
+ 			break;
+ 		case IP_VERSION(9, 1, 0):
+ 		case IP_VERSION(9, 2, 2):
+-			pcache_info = raven_cache_info;
++			*pcache_info = raven_cache_info;
+ 			num_of_cache_types = ARRAY_SIZE(raven_cache_info);
+ 			break;
+ 		case IP_VERSION(9, 3, 0):
+-			pcache_info = renoir_cache_info;
++			*pcache_info = renoir_cache_info;
+ 			num_of_cache_types = ARRAY_SIZE(renoir_cache_info);
+ 			break;
+ 		case IP_VERSION(10, 1, 10):
+ 		case IP_VERSION(10, 1, 2):
+ 		case IP_VERSION(10, 1, 3):
+ 		case IP_VERSION(10, 1, 4):
+-			pcache_info = navi10_cache_info;
++			*pcache_info = navi10_cache_info;
+ 			num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
+ 			break;
+ 		case IP_VERSION(10, 1, 1):
+-			pcache_info = navi14_cache_info;
++			*pcache_info = navi14_cache_info;
+ 			num_of_cache_types = ARRAY_SIZE(navi14_cache_info);
+ 			break;
+ 		case IP_VERSION(10, 3, 0):
+-			pcache_info = sienna_cichlid_cache_info;
++			*pcache_info = sienna_cichlid_cache_info;
+ 			num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info);
+ 			break;
+ 		case IP_VERSION(10, 3, 2):
+-			pcache_info = navy_flounder_cache_info;
++			*pcache_info = navy_flounder_cache_info;
+ 			num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info);
+ 			break;
+ 		case IP_VERSION(10, 3, 4):
+-			pcache_info = dimgrey_cavefish_cache_info;
++			*pcache_info = dimgrey_cavefish_cache_info;
+ 			num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info);
+ 			break;
+ 		case IP_VERSION(10, 3, 1):
+-			pcache_info = vangogh_cache_info;
++			*pcache_info = vangogh_cache_info;
+ 			num_of_cache_types = ARRAY_SIZE(vangogh_cache_info);
+ 			break;
+ 		case IP_VERSION(10, 3, 5):
+-			pcache_info = beige_goby_cache_info;
++			*pcache_info = beige_goby_cache_info;
+ 			num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info);
+ 			break;
+ 		case IP_VERSION(10, 3, 3):
+-			pcache_info = yellow_carp_cache_info;
++			*pcache_info = yellow_carp_cache_info;
+ 			num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info);
+ 			break;
+ 		case IP_VERSION(10, 3, 6):
+-			pcache_info = gc_10_3_6_cache_info;
++			*pcache_info = gc_10_3_6_cache_info;
+ 			num_of_cache_types = ARRAY_SIZE(gc_10_3_6_cache_info);
+ 			break;
+ 		case IP_VERSION(10, 3, 7):
+-			pcache_info = gfx1037_cache_info;
++			*pcache_info = gfx1037_cache_info;
+ 			num_of_cache_types = ARRAY_SIZE(gfx1037_cache_info);
+ 			break;
+ 		case IP_VERSION(11, 0, 0):
+ 		case IP_VERSION(11, 0, 1):
+ 		case IP_VERSION(11, 0, 2):
+ 		case IP_VERSION(11, 0, 3):
+-			pcache_info = cache_info;
+ 			num_of_cache_types =
+-				kfd_fill_gpu_cache_info_from_gfx_config(kdev, pcache_info);
++				kfd_fill_gpu_cache_info_from_gfx_config(kdev, *pcache_info);
+ 			break;
+ 		default:
+-			pcache_info = dummy_cache_info;
++			*pcache_info = dummy_cache_info;
+ 			num_of_cache_types = ARRAY_SIZE(dummy_cache_info);
+ 			pr_warn("dummy cache info is used temporarily and real cache info need update later.\n");
+ 			break;
+ 		}
+ 	}
+-
+-	*size_filled = 0;
+-	*num_of_entries = 0;
+-
+-	/* For each type of cache listed in the kfd_gpu_cache_info table,
+-	 * go through all available Compute Units.
+-	 * The [i,j,k] loop will
+-	 *		if kfd_gpu_cache_info.num_cu_shared = 1
+-	 *			will parse through all available CU
+-	 *		If (kfd_gpu_cache_info.num_cu_shared != 1)
+-	 *			then it will consider only one CU from
+-	 *			the shared unit
+-	 */
+-
+-	for (ct = 0; ct < num_of_cache_types; ct++) {
+-	  cu_processor_id = gpu_processor_id;
+-	  if (pcache_info[ct].cache_level == 1) {
+-	    for (i = 0; i < cu_info->num_shader_engines; i++) {
+-	      for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) {
+-	        for (k = 0; k < cu_info->num_cu_per_sh;
+-		  k += pcache_info[ct].num_cu_shared) {
+-		  ret = fill_in_l1_pcache(pcache,
+-					pcache_info,
+-					cu_info,
+-					mem_available,
+-					cu_info->cu_bitmap[i % 4][j + i / 4],
+-					ct,
+-					cu_processor_id,
+-					k);
+-
+-		  if (ret < 0)
+-			break;
+-
+-		  if (!ret) {
+-				pcache++;
+-				(*num_of_entries)++;
+-				mem_available -= sizeof(*pcache);
+-				(*size_filled) += sizeof(*pcache);
+-		  }
+-
+-		  /* Move to next CU block */
+-		  num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <=
+-					cu_info->num_cu_per_sh) ?
+-					pcache_info[ct].num_cu_shared :
+-					(cu_info->num_cu_per_sh - k);
+-		  cu_processor_id += num_cu_shared;
+-		}
+-	      }
+-	    }
+-	  } else {
+-			ret = fill_in_l2_l3_pcache(pcache,
+-				pcache_info,
+-				cu_info,
+-				mem_available,
+-				ct,
+-				cu_processor_id);
+-
+-			if (ret < 0)
+-				break;
+-
+-			if (!ret) {
+-				pcache++;
+-				(*num_of_entries)++;
+-				mem_available -= sizeof(*pcache);
+-				(*size_filled) += sizeof(*pcache);
+-			}
+-	  }
+-	}
+-
+-	pr_debug("Added [%d] GPU cache entries\n", *num_of_entries);
+-
+-	return 0;
++	return num_of_cache_types;
+ }
+ 
+ static bool kfd_ignore_crat(void)
+@@ -2314,8 +2091,6 @@ static int kfd_create_vcrat_image_gpu(vo
+ 	struct kfd_cu_info cu_info;
+ 	int avail_size = *size;
+ 	uint32_t total_num_of_cu;
+-	int num_of_cache_entries = 0;
+-	int cache_mem_filled = 0;
+ 	uint32_t nid = 0;
+ 	int ret = 0;
+ 
+@@ -2416,31 +2191,12 @@ static int kfd_create_vcrat_image_gpu(vo
+ 	crat_table->length += sizeof(struct crat_subtype_memory);
+ 	crat_table->total_entries++;
+ 
+-	/* TODO: Fill in cache information. This information is NOT readily
+-	 * available in KGD
+-	 */
+-	sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
+-		sub_type_hdr->length);
+-	ret = kfd_fill_gpu_cache_info(kdev, cu->processor_id_low,
+-				avail_size,
+-				&cu_info,
+-				(struct crat_subtype_cache *)sub_type_hdr,
+-				&cache_mem_filled,
+-				&num_of_cache_entries);
+-
+-	if (ret < 0)
+-		return ret;
+-
+-	crat_table->length += cache_mem_filled;
+-	crat_table->total_entries += num_of_cache_entries;
+-	avail_size -= cache_mem_filled;
+-
+ 	/* Fill in Subtype: IO_LINKS
+ 	 *  Only direct links are added here which is Link from GPU to
+ 	 *  its NUMA node. Indirect links are added by userspace.
+ 	 */
+ 	sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
+-		cache_mem_filled);
++		sub_type_hdr->length);
+ 	ret = kfd_fill_gpu_direct_io_link_to_cpu(&avail_size, kdev,
+ 		(struct crat_subtype_iolink *)sub_type_hdr, proximity_domain);
+ 
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
+@@ -317,6 +317,18 @@ struct cdit_header {
+ 
+ struct kfd_dev;
+ 
++/* Static table to describe GPU Cache information */
++struct kfd_gpu_cache_info {
++	uint32_t	cache_size;
++	uint32_t	cache_level;
++	uint32_t	flags;
++	/* Indicates how many Compute Units share this cache
++	 * within a SA. Value = 1 indicates the cache is not shared
++	 */
++	uint32_t	num_cu_shared;
++};
++int kfd_get_gpu_cache_info(struct kfd_dev *kdev, struct kfd_gpu_cache_info **pcache_info);
++
+ int kfd_create_crat_image_acpi(void **crat_image, size_t *size);
+ void kfd_destroy_crat_image(void *crat_image);
+ int kfd_parse_crat_table(void *crat_image, struct list_head *device_list,
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+@@ -364,7 +364,6 @@ static ssize_t kfd_cache_show(struct kob
+ 
+ 	/* Making sure that the buffer is an empty string */
+ 	buffer[0] = 0;
+-
+ 	cache = container_of(attr, struct kfd_cache_properties, attr);
+ 	if (cache->gpu && kfd_devcgroup_check_permission(cache->gpu))
+ 		return -EPERM;
+@@ -379,12 +378,13 @@ static ssize_t kfd_cache_show(struct kob
+ 	sysfs_show_32bit_prop(buffer, offs, "association", cache->cache_assoc);
+ 	sysfs_show_32bit_prop(buffer, offs, "latency", cache->cache_latency);
+ 	sysfs_show_32bit_prop(buffer, offs, "type", cache->cache_type);
++
+ 	offs += snprintf(buffer+offs, PAGE_SIZE-offs, "sibling_map ");
+-	for (i = 0; i < CRAT_SIBLINGMAP_SIZE; i++)
++	for (i = 0; i < cache->sibling_map_size; i++)
+ 		for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++)
+ 			/* Check each bit */
+ 			offs += snprintf(buffer+offs, PAGE_SIZE-offs, "%d,",
+-					 (cache->sibling_map[i] >> j) & 1);
++						(cache->sibling_map[i] >> j) & 1);
+ 
+ 	/* Replace the last "," with end of line */
+ 	buffer[offs-1] = '\n';
+@@ -1198,7 +1198,6 @@ static struct kfd_topology_device *kfd_a
+ 	struct kfd_iolink_properties *iolink;
+ 	struct kfd_iolink_properties *p2plink;
+ 
+-	down_write(&topology_lock);
+ 	list_for_each_entry(dev, &topology_device_list, list) {
+ 		/* Discrete GPUs need their own topology device list
+ 		 * entries. Don't assign them to CPU/APU nodes.
+@@ -1222,7 +1221,6 @@ static struct kfd_topology_device *kfd_a
+ 			break;
+ 		}
+ 	}
+-	up_write(&topology_lock);
+ 	return out_dev;
+ }
+ 
+@@ -1593,6 +1591,221 @@ out:
+ 	return ret;
+ }
+ 
++
++/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
++static int fill_in_l1_pcache(struct kfd_cache_properties **props_ext,
++				struct kfd_gpu_cache_info *pcache_info,
++				struct kfd_cu_info *cu_info,
++				int cu_bitmask,
++				int cache_type, unsigned int cu_processor_id,
++				int cu_block)
++{
++	unsigned int cu_sibling_map_mask;
++	int first_active_cu;
++	struct kfd_cache_properties *pcache = NULL;
++
++	cu_sibling_map_mask = cu_bitmask;
++	cu_sibling_map_mask >>= cu_block;
++	cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1);
++	first_active_cu = ffs(cu_sibling_map_mask);
++
++	/* CU could be inactive. In case of shared cache find the first active
++	 * CU. and incase of non-shared cache check if the CU is inactive. If
++	 * inactive active skip it
++	 */
++	if (first_active_cu) {
++		pcache = kfd_alloc_struct(pcache);
++		if (!pcache)
++			return -ENOMEM;
++
++		memset(pcache, 0, sizeof(struct kfd_cache_properties));
++		pcache->processor_id_low = cu_processor_id + (first_active_cu - 1);
++		pcache->cache_level = pcache_info[cache_type].cache_level;
++		pcache->cache_size = pcache_info[cache_type].cache_size;
++
++		if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE)
++			pcache->cache_type |= HSA_CACHE_TYPE_DATA;
++		if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_INST_CACHE)
++			pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION;
++		if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_CPU_CACHE)
++			pcache->cache_type |= HSA_CACHE_TYPE_CPU;
++		if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_SIMD_CACHE)
++			pcache->cache_type |= HSA_CACHE_TYPE_HSACU;
++
++		/* Sibling map is w.r.t processor_id_low, so shift out
++		 * inactive CU
++		 */
++		cu_sibling_map_mask =
++			cu_sibling_map_mask >> (first_active_cu - 1);
++
++		pcache->sibling_map[0] = (uint8_t)(cu_sibling_map_mask & 0xFF);
++		pcache->sibling_map[1] =
++				(uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
++		pcache->sibling_map[2] =
++				(uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
++		pcache->sibling_map[3] =
++				(uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
++
++		pcache->sibling_map_size = 4;
++		*props_ext = pcache;
++
++		return 0;
++	}
++	return 1;
++}
++
++/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
++static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
++				struct kfd_gpu_cache_info *pcache_info,
++				struct kfd_cu_info *cu_info,
++				int cache_type, unsigned int cu_processor_id)
++{
++	unsigned int cu_sibling_map_mask;
++	int first_active_cu;
++	int i, j, k;
++	struct kfd_cache_properties *pcache = NULL;
++
++	cu_sibling_map_mask = cu_info->cu_bitmap[0][0];
++	cu_sibling_map_mask &=
++		((1 << pcache_info[cache_type].num_cu_shared) - 1);
++	first_active_cu = ffs(cu_sibling_map_mask);
++
++	/* CU could be inactive. In case of shared cache find the first active
++	 * CU. and incase of non-shared cache check if the CU is inactive. If
++	 * inactive active skip it
++	 */
++	if (first_active_cu) {
++		pcache = kfd_alloc_struct(pcache);
++		if (!pcache)
++			return -ENOMEM;
++
++		memset(pcache, 0, sizeof(struct kfd_cache_properties));
++		pcache->processor_id_low = cu_processor_id
++					+ (first_active_cu - 1);
++		pcache->cache_level = pcache_info[cache_type].cache_level;
++		pcache->cache_size = pcache_info[cache_type].cache_size;
++
++		if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE)
++			pcache->cache_type |= HSA_CACHE_TYPE_DATA;
++		if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_INST_CACHE)
++			pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION;
++		if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_CPU_CACHE)
++			pcache->cache_type |= HSA_CACHE_TYPE_CPU;
++		if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_SIMD_CACHE)
++			pcache->cache_type |= HSA_CACHE_TYPE_HSACU;
++
++		/* Sibling map is w.r.t processor_id_low, so shift out
++		 * inactive CU
++		 */
++		cu_sibling_map_mask = cu_sibling_map_mask >> (first_active_cu - 1);
++		k = 0;
++
++		for (i = 0; i < cu_info->num_shader_engines; i++) {
++			for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) {
++				pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF);
++				pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
++				pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
++				pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
++				k += 4;
++
++				cu_sibling_map_mask = cu_info->cu_bitmap[i % 4][j + i / 4];
++				cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1);
++			}
++		}
++		pcache->sibling_map_size = k;
++		*props_ext = pcache;
++		return 0;
++	}
++	return 1;
++}
++
++#define KFD_MAX_CACHE_TYPES 6
++
++/* kfd_fill_cache_non_crat_info - Fill GPU cache info using kfd_gpu_cache_info
++ * tables
++ */
++void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct kfd_dev *kdev)
++{
++	struct kfd_gpu_cache_info *pcache_info = NULL;
++	int i, j, k;
++	int ct = 0;
++	unsigned int cu_processor_id;
++	int ret;
++	unsigned int num_cu_shared;
++	struct kfd_cu_info cu_info;
++	struct kfd_cu_info *pcu_info;
++	int gpu_processor_id;
++	struct kfd_cache_properties *props_ext;
++	int num_of_entries = 0;
++	int num_of_cache_types = 0;
++	struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES];
++
++	amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info);
++	pcu_info = &cu_info;
++
++	gpu_processor_id = dev->node_props.simd_id_base;
++
++	pcache_info = cache_info;
++	num_of_cache_types = kfd_get_gpu_cache_info(kdev, &pcache_info);
++	if (!num_of_cache_types) {
++		pr_warn("no cache info found\n");
++		return;
++	}
++
++	/* For each type of cache listed in the kfd_gpu_cache_info table,
++	 * go through all available Compute Units.
++	 * The [i,j,k] loop will
++	 *		if kfd_gpu_cache_info.num_cu_shared = 1
++	 *			will parse through all available CU
++	 *		If (kfd_gpu_cache_info.num_cu_shared != 1)
++	 *			then it will consider only one CU from
++	 *			the shared unit
++	 */
++	for (ct = 0; ct < num_of_cache_types; ct++) {
++		cu_processor_id = gpu_processor_id;
++		if (pcache_info[ct].cache_level == 1) {
++			for (i = 0; i < pcu_info->num_shader_engines; i++) {
++				for (j = 0; j < pcu_info->num_shader_arrays_per_engine; j++) {
++					for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) {
++
++						ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info,
++										pcu_info->cu_bitmap[i % 4][j + i / 4], ct,
++										cu_processor_id, k);
++
++						if (ret < 0)
++							break;
++
++						if (!ret) {
++							num_of_entries++;
++							list_add_tail(&props_ext->list, &dev->cache_props);
++						}
++
++						/* Move to next CU block */
++						num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <=
++							pcu_info->num_cu_per_sh) ?
++							pcache_info[ct].num_cu_shared :
++							(pcu_info->num_cu_per_sh - k);
++						cu_processor_id += num_cu_shared;
++					}
++				}
++			}
++		} else {
++			ret = fill_in_l2_l3_pcache(&props_ext, pcache_info,
++								pcu_info, ct, cu_processor_id);
++
++			if (ret < 0)
++				break;
++
++			if (!ret) {
++				num_of_entries++;
++				list_add_tail(&props_ext->list, &dev->cache_props);
++			}
++		}
++	}
++	dev->node_props.caches_count += num_of_entries;
++	pr_debug("Added [%d] GPU cache entries\n", num_of_entries);
++}
++
+ int kfd_topology_add_device(struct kfd_dev *gpu)
+ {
+ 	uint32_t gpu_id;
+@@ -1617,9 +1830,9 @@ int kfd_topology_add_device(struct kfd_d
+ 	 * CRAT to create a new topology device. Once created assign the gpu to
+ 	 * that topology device
+ 	 */
++	down_write(&topology_lock);
+ 	dev = kfd_assign_gpu(gpu);
+ 	if (!dev) {
+-		down_write(&topology_lock);
+ 		proximity_domain = ++topology_crat_proximity_domain;
+ 
+ 		res = kfd_create_crat_image_virtual(&crat_image, &image_size,
+@@ -1631,6 +1844,7 @@ int kfd_topology_add_device(struct kfd_d
+ 			topology_crat_proximity_domain--;
+ 			return res;
+ 		}
++
+ 		res = kfd_parse_crat_table(crat_image,
+ 					   &temp_topology_device_list,
+ 					   proximity_domain);
+@@ -1644,23 +1858,28 @@ int kfd_topology_add_device(struct kfd_d
+ 		kfd_topology_update_device_list(&temp_topology_device_list,
+ 			&topology_device_list);
+ 
++		dev = kfd_assign_gpu(gpu);
++		if (WARN_ON(!dev)) {
++			res = -ENODEV;
++			goto err;
++		}
++
++		/* Fill the cache affinity information here for the GPUs
++		 * using VCRAT
++		 */
++		kfd_fill_cache_non_crat_info(dev, gpu);
++
+ 		/* Update the SYSFS tree, since we added another topology
+ 		 * device
+ 		 */
+ 		res = kfd_topology_update_sysfs();
+-		up_write(&topology_lock);
+-
+ 		if (!res)
+ 			sys_props.generation_count++;
+ 		else
+ 			pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n",
+ 						gpu_id, res);
+-		dev = kfd_assign_gpu(gpu);
+-		if (WARN_ON(!dev)) {
+-			res = -ENODEV;
+-			goto err;
+-		}
+ 	}
++	up_write(&topology_lock);
+ 
+ 	dev->gpu_id = gpu_id;
+ 	gpu->id = gpu_id;
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+@@ -80,6 +80,8 @@ struct kfd_mem_properties {
+ 	struct attribute	attr;
+ };
+ 
++#define CACHE_SIBLINGMAP_SIZE 64
++
+ struct kfd_cache_properties {
+ 	struct list_head	list;
+ 	uint32_t		processor_id_low;
+@@ -90,10 +92,11 @@ struct kfd_cache_properties {
+ 	uint32_t		cache_assoc;
+ 	uint32_t		cache_latency;
+ 	uint32_t		cache_type;
+-	uint8_t			sibling_map[CRAT_SIBLINGMAP_SIZE];
++	uint8_t			sibling_map[CACHE_SIBLINGMAP_SIZE];
+ 	struct kfd_dev		*gpu;
+ 	struct kobject		*kobj;
+ 	struct attribute	attr;
++	uint32_t		sibling_map_size;
+ };
+ 
+ struct kfd_iolink_properties {
diff --git a/queue-6.1/drm-amdkfd-introduce-dummy-cache-info-for-property-asic.patch b/queue-6.1/drm-amdkfd-introduce-dummy-cache-info-for-property-asic.patch
new file mode 100644
index 00000000000..bb9fc4ce16f
--- /dev/null
+++ b/queue-6.1/drm-amdkfd-introduce-dummy-cache-info-for-property-asic.patch
@@ -0,0 +1,89 @@
+From fd72e2cb2f9dd2734e8013b3e185a21f0d605d3e Mon Sep 17 00:00:00 2001
+From: Prike Liang <Prike.Liang@amd.com>
+Date: Fri, 21 Oct 2022 16:38:48 -0400
+Subject: drm/amdkfd: introduce dummy cache info for property asic
+
+From: Prike Liang <Prike.Liang@amd.com>
+
+commit fd72e2cb2f9dd2734e8013b3e185a21f0d605d3e upstream.
+
+This dummy cache info will enable kfd base function support.
+
+Signed-off-by: Prike Liang <Prike.Liang@amd.com>
+Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: "Limonciello, Mario" <Mario.Limonciello@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdkfd/kfd_crat.c |   53 +++++++++++++++++++++++++++++++++-
+ 1 file changed, 52 insertions(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+@@ -891,6 +891,54 @@ static struct kfd_gpu_cache_info gc_10_3
+ 	},
+ };
+ 
++static struct kfd_gpu_cache_info dummy_cache_info[] = {
++	{
++		/* TCP L1 Cache per CU */
++		.cache_size = 16,
++		.cache_level = 1,
++		.flags = (CRAT_CACHE_FLAGS_ENABLED |
++				CRAT_CACHE_FLAGS_DATA_CACHE |
++				CRAT_CACHE_FLAGS_SIMD_CACHE),
++		.num_cu_shared = 1,
++	},
++	{
++		/* Scalar L1 Instruction Cache per SQC */
++		.cache_size = 32,
++		.cache_level = 1,
++		.flags = (CRAT_CACHE_FLAGS_ENABLED |
++				CRAT_CACHE_FLAGS_INST_CACHE |
++				CRAT_CACHE_FLAGS_SIMD_CACHE),
++		.num_cu_shared = 2,
++	},
++	{
++		/* Scalar L1 Data Cache per SQC */
++		.cache_size = 16,
++		.cache_level = 1,
++		.flags = (CRAT_CACHE_FLAGS_ENABLED |
++				CRAT_CACHE_FLAGS_DATA_CACHE |
++				CRAT_CACHE_FLAGS_SIMD_CACHE),
++		.num_cu_shared = 2,
++	},
++	{
++		/* GL1 Data Cache per SA */
++		.cache_size = 128,
++		.cache_level = 1,
++		.flags = (CRAT_CACHE_FLAGS_ENABLED |
++				CRAT_CACHE_FLAGS_DATA_CACHE |
++				CRAT_CACHE_FLAGS_SIMD_CACHE),
++		.num_cu_shared = 6,
++	},
++	{
++		/* L2 Data Cache per GPU (Total Tex Cache) */
++		.cache_size = 2048,
++		.cache_level = 2,
++		.flags = (CRAT_CACHE_FLAGS_ENABLED |
++				CRAT_CACHE_FLAGS_DATA_CACHE |
++				CRAT_CACHE_FLAGS_SIMD_CACHE),
++		.num_cu_shared = 6,
++	},
++};
++
+ static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev,
+ 		struct crat_subtype_computeunit *cu)
+ {
+@@ -1630,7 +1678,10 @@ static int kfd_fill_gpu_cache_info(struc
+ 				kfd_fill_gpu_cache_info_from_gfx_config(kdev, pcache_info);
+ 			break;
+ 		default:
+-			return -EINVAL;
++			pcache_info = dummy_cache_info;
++			num_of_cache_types = ARRAY_SIZE(dummy_cache_info);
++			pr_warn("dummy cache info is used temporarily and real cache info need update later.\n");
++			break;
+ 		}
+ 	}
+ 
diff --git a/queue-6.1/series b/queue-6.1/series
index 1e002bb317f..744be745ff4 100644
--- a/queue-6.1/series
+++ b/queue-6.1/series
@@ -218,3 +218,6 @@ dm-crypt-add-cond_resched-to-dmcrypt_write.patch
 dm-crypt-avoid-accessing-uninitialized-tasklet.patch
 sched-fair-sanitize-vruntime-of-entity-being-placed.patch
 sched-fair-sanitize-vruntime-of-entity-being-migrated.patch
+drm-amdkfd-introduce-dummy-cache-info-for-property-asic.patch
+drm-amdkfd-fix-the-warning-of-array-index-out-of-bounds.patch
+drm-amdkfd-add-gc-11.0.4-kfd-support.patch