6.1-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 28 Mar 2023 13:32:37 +0000 (15:32 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 28 Mar 2023 13:32:37 +0000 (15:32 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 28 Mar 2023 13:32:37 +0000 (15:32 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 28 Mar 2023 13:32:37 +0000 (15:32 +0200)
diff --git a/queue-6.1/drm-amdkfd-add-gc-11.0.4-kfd-support.patch b/queue-6.1/drm-amdkfd-add-gc-11.0.4-kfd-support.patch

new file mode 100644 (file)

index 0000000..f1472cb
--- /dev/null
+++ b/queue-6.1/drm-amdkfd-add-gc-11.0.4-kfd-support.patch
@@ -0,0 +1,49 @@
+From 88c21c2b56aa21dd34290d43ada74033dc3bfe35 Mon Sep 17 00:00:00 2001
+From: Yifan Zhang <yifan1.zhang@amd.com>
+Date: Wed, 12 Oct 2022 13:01:22 +0800
+Subject: drm/amdkfd: add GC 11.0.4 KFD support
+
+From: Yifan Zhang <yifan1.zhang@amd.com>
+
+commit 88c21c2b56aa21dd34290d43ada74033dc3bfe35 upstream.
+
+Add initial support for GC 11.0.4 in KFD compute driver.
+
+Signed-off-by: Yifan Zhang <yifan1.zhang@amd.com>
+Reviewed-by: Aaron Liu <aaron.liu@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: "Limonciello, Mario" <Mario.Limonciello@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdkfd/kfd_crat.c   |    1 +
+ drivers/gpu/drm/amd/amdkfd/kfd_device.c |    2 ++
+ 2 files changed, 3 insertions(+)
+
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+@@ -1522,6 +1522,7 @@ int kfd_get_gpu_cache_info(struct kfd_de
+               case IP_VERSION(11, 0, 1):
+               case IP_VERSION(11, 0, 2):
+               case IP_VERSION(11, 0, 3):
++              case IP_VERSION(11, 0, 4):
+                       num_of_cache_types =
+                               kfd_fill_gpu_cache_info_from_gfx_config(kdev, *pcache_info);
+                       break;
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+@@ -154,6 +154,7 @@ static void kfd_device_info_set_event_in
+       case IP_VERSION(11, 0, 1):
+       case IP_VERSION(11, 0, 2):
+       case IP_VERSION(11, 0, 3):
++      case IP_VERSION(11, 0, 4):
+               kfd->device_info.event_interrupt_class = &event_interrupt_class_v11;
+               break;
+       default:
+@@ -396,6 +397,7 @@ struct kfd_dev *kgd2kfd_probe(struct amd
+                       f2g = &gfx_v11_kfd2kgd;
+                       break;
+               case IP_VERSION(11, 0, 1):
++              case IP_VERSION(11, 0, 4):
+                       gfx_target_version = 110003;
+                       f2g = &gfx_v11_kfd2kgd;
+                       break;
diff --git a/queue-6.1/drm-amdkfd-fix-the-warning-of-array-index-out-of-bounds.patch b/queue-6.1/drm-amdkfd-fix-the-warning-of-array-index-out-of-bounds.patch

new file mode 100644 (file)

index 0000000..07d05c6
--- /dev/null
+++ b/queue-6.1/drm-amdkfd-fix-the-warning-of-array-index-out-of-bounds.patch
@@ -0,0 +1,853 @@
+From c0cc999f3c32e65a7c88fb323893ddf897b24488 Mon Sep 17 00:00:00 2001
+From: Ma Jun <Jun.Ma2@amd.com>
+Date: Wed, 2 Nov 2022 15:53:26 +0800
+Subject: drm/amdkfd: Fix the warning of array-index-out-of-bounds
+
+From: Ma Jun <Jun.Ma2@amd.com>
+
+commit c0cc999f3c32e65a7c88fb323893ddf897b24488 upstream.
+
+For some GPUs with more CUs, the original sibling_map[32]
+in struct crat_subtype_cache is not enough
+to save the cache information when create the VCRAT table,
+so skip filling the struct crat_subtype_cache info instead
+fill struct kfd_cache_properties directly to fix this problem.
+
+Signed-off-by: Ma Jun <Jun.Ma2@amd.com>
+Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: "Limonciello, Mario" <Mario.Limonciello@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdkfd/kfd_crat.c     |  312 +++---------------------------
+ drivers/gpu/drm/amd/amdkfd/kfd_crat.h     |   12 +
+ drivers/gpu/drm/amd/amdkfd/kfd_topology.c |  245 ++++++++++++++++++++++-
+ drivers/gpu/drm/amd/amdkfd/kfd_topology.h |    5 
+ 4 files changed, 282 insertions(+), 292 deletions(-)
+
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+@@ -50,16 +50,6 @@ static inline unsigned int get_and_inc_g
+       return current_id;
+ }
+ 
+-/* Static table to describe GPU Cache information */
+-struct kfd_gpu_cache_info {
+-      uint32_t        cache_size;
+-      uint32_t        cache_level;
+-      uint32_t        flags;
+-      /* Indicates how many Compute Units share this cache
+-       * within a SA. Value = 1 indicates the cache is not shared
+-       */
+-      uint32_t        num_cu_shared;
+-};
+ 
+ static struct kfd_gpu_cache_info kaveri_cache_info[] = {
+       {
+@@ -1119,9 +1109,13 @@ static int kfd_parse_subtype_cache(struc
+                       props->cachelines_per_tag = cache->lines_per_tag;
+                       props->cache_assoc = cache->associativity;
+                       props->cache_latency = cache->cache_latency;
++
+                       memcpy(props->sibling_map, cache->sibling_map,
+                                       sizeof(props->sibling_map));
+ 
++                      /* set the sibling_map_size as 32 for CRAT from ACPI */
++                      props->sibling_map_size = CRAT_SIBLINGMAP_SIZE;
++
+                       if (cache->flags & CRAT_CACHE_FLAGS_DATA_CACHE)
+                               props->cache_type |= HSA_CACHE_TYPE_DATA;
+                       if (cache->flags & CRAT_CACHE_FLAGS_INST_CACHE)
+@@ -1339,125 +1333,6 @@ err:
+       return ret;
+ }
+ 
+-/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
+-static int fill_in_l1_pcache(struct crat_subtype_cache *pcache,
+-                              struct kfd_gpu_cache_info *pcache_info,
+-                              struct kfd_cu_info *cu_info,
+-                              int mem_available,
+-                              int cu_bitmask,
+-                              int cache_type, unsigned int cu_processor_id,
+-                              int cu_block)
+-{
+-      unsigned int cu_sibling_map_mask;
+-      int first_active_cu;
+-
+-      /* First check if enough memory is available */
+-      if (sizeof(struct crat_subtype_cache) > mem_available)
+-              return -ENOMEM;
+-
+-      cu_sibling_map_mask = cu_bitmask;
+-      cu_sibling_map_mask >>= cu_block;
+-      cu_sibling_map_mask &=
+-              ((1 << pcache_info[cache_type].num_cu_shared) - 1);
+-      first_active_cu = ffs(cu_sibling_map_mask);
+-
+-      /* CU could be inactive. In case of shared cache find the first active
+-       * CU. and incase of non-shared cache check if the CU is inactive. If
+-       * inactive active skip it
+-       */
+-      if (first_active_cu) {
+-              memset(pcache, 0, sizeof(struct crat_subtype_cache));
+-              pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY;
+-              pcache->length = sizeof(struct crat_subtype_cache);
+-              pcache->flags = pcache_info[cache_type].flags;
+-              pcache->processor_id_low = cu_processor_id
+-                                       + (first_active_cu - 1);
+-              pcache->cache_level = pcache_info[cache_type].cache_level;
+-              pcache->cache_size = pcache_info[cache_type].cache_size;
+-
+-              /* Sibling map is w.r.t processor_id_low, so shift out
+-               * inactive CU
+-               */
+-              cu_sibling_map_mask =
+-                      cu_sibling_map_mask >> (first_active_cu - 1);
+-
+-              pcache->sibling_map[0] = (uint8_t)(cu_sibling_map_mask & 0xFF);
+-              pcache->sibling_map[1] =
+-                              (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
+-              pcache->sibling_map[2] =
+-                              (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
+-              pcache->sibling_map[3] =
+-                              (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
+-              return 0;
+-      }
+-      return 1;
+-}
+-
+-/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
+-static int fill_in_l2_l3_pcache(struct crat_subtype_cache *pcache,
+-                              struct kfd_gpu_cache_info *pcache_info,
+-                              struct kfd_cu_info *cu_info,
+-                              int mem_available,
+-                              int cache_type, unsigned int cu_processor_id)
+-{
+-      unsigned int cu_sibling_map_mask;
+-      int first_active_cu;
+-      int i, j, k;
+-
+-      /* First check if enough memory is available */
+-      if (sizeof(struct crat_subtype_cache) > mem_available)
+-              return -ENOMEM;
+-
+-      cu_sibling_map_mask = cu_info->cu_bitmap[0][0];
+-      cu_sibling_map_mask &=
+-              ((1 << pcache_info[cache_type].num_cu_shared) - 1);
+-      first_active_cu = ffs(cu_sibling_map_mask);
+-
+-      /* CU could be inactive. In case of shared cache find the first active
+-       * CU. and incase of non-shared cache check if the CU is inactive. If
+-       * inactive active skip it
+-       */
+-      if (first_active_cu) {
+-              memset(pcache, 0, sizeof(struct crat_subtype_cache));
+-              pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY;
+-              pcache->length = sizeof(struct crat_subtype_cache);
+-              pcache->flags = pcache_info[cache_type].flags;
+-              pcache->processor_id_low = cu_processor_id
+-                                       + (first_active_cu - 1);
+-              pcache->cache_level = pcache_info[cache_type].cache_level;
+-              pcache->cache_size = pcache_info[cache_type].cache_size;
+-
+-              /* Sibling map is w.r.t processor_id_low, so shift out
+-               * inactive CU
+-               */
+-              cu_sibling_map_mask =
+-                      cu_sibling_map_mask >> (first_active_cu - 1);
+-              k = 0;
+-              for (i = 0; i < cu_info->num_shader_engines; i++) {
+-                      for (j = 0; j < cu_info->num_shader_arrays_per_engine;
+-                              j++) {
+-                              pcache->sibling_map[k] =
+-                               (uint8_t)(cu_sibling_map_mask & 0xFF);
+-                              pcache->sibling_map[k+1] =
+-                               (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
+-                              pcache->sibling_map[k+2] =
+-                               (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
+-                              pcache->sibling_map[k+3] =
+-                               (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
+-                              k += 4;
+-                              cu_sibling_map_mask =
+-                                      cu_info->cu_bitmap[i % 4][j + i / 4];
+-                              cu_sibling_map_mask &= (
+-                               (1 << pcache_info[cache_type].num_cu_shared)
+-                               - 1);
+-                      }
+-              }
+-              return 0;
+-      }
+-      return 1;
+-}
+-
+-#define KFD_MAX_CACHE_TYPES 6
+ 
+ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
+                                                  struct kfd_gpu_cache_info *pcache_info)
+@@ -1531,231 +1406,133 @@ static int kfd_fill_gpu_cache_info_from_
+       return i;
+ }
+ 
+-/* kfd_fill_gpu_cache_info - Fill GPU cache info using kfd_gpu_cache_info
+- * tables
+- *
+- *    @kdev - [IN] GPU device
+- *    @gpu_processor_id - [IN] GPU processor ID to which these caches
+- *                        associate
+- *    @available_size - [IN] Amount of memory available in pcache
+- *    @cu_info - [IN] Compute Unit info obtained from KGD
+- *    @pcache - [OUT] memory into which cache data is to be filled in.
+- *    @size_filled - [OUT] amount of data used up in pcache.
+- *    @num_of_entries - [OUT] number of caches added
+- */
+-static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
+-                      int gpu_processor_id,
+-                      int available_size,
+-                      struct kfd_cu_info *cu_info,
+-                      struct crat_subtype_cache *pcache,
+-                      int *size_filled,
+-                      int *num_of_entries)
++int kfd_get_gpu_cache_info(struct kfd_dev *kdev, struct kfd_gpu_cache_info **pcache_info)
+ {
+-      struct kfd_gpu_cache_info *pcache_info;
+-      struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES];
+       int num_of_cache_types = 0;
+-      int i, j, k;
+-      int ct = 0;
+-      int mem_available = available_size;
+-      unsigned int cu_processor_id;
+-      int ret;
+-      unsigned int num_cu_shared;
+ 
+       switch (kdev->adev->asic_type) {
+       case CHIP_KAVERI:
+-              pcache_info = kaveri_cache_info;
++              *pcache_info = kaveri_cache_info;
+               num_of_cache_types = ARRAY_SIZE(kaveri_cache_info);
+               break;
+       case CHIP_HAWAII:
+-              pcache_info = hawaii_cache_info;
++              *pcache_info = hawaii_cache_info;
+               num_of_cache_types = ARRAY_SIZE(hawaii_cache_info);
+               break;
+       case CHIP_CARRIZO:
+-              pcache_info = carrizo_cache_info;
++              *pcache_info = carrizo_cache_info;
+               num_of_cache_types = ARRAY_SIZE(carrizo_cache_info);
+               break;
+       case CHIP_TONGA:
+-              pcache_info = tonga_cache_info;
++              *pcache_info = tonga_cache_info;
+               num_of_cache_types = ARRAY_SIZE(tonga_cache_info);
+               break;
+       case CHIP_FIJI:
+-              pcache_info = fiji_cache_info;
++              *pcache_info = fiji_cache_info;
+               num_of_cache_types = ARRAY_SIZE(fiji_cache_info);
+               break;
+       case CHIP_POLARIS10:
+-              pcache_info = polaris10_cache_info;
++              *pcache_info = polaris10_cache_info;
+               num_of_cache_types = ARRAY_SIZE(polaris10_cache_info);
+               break;
+       case CHIP_POLARIS11:
+-              pcache_info = polaris11_cache_info;
++              *pcache_info = polaris11_cache_info;
+               num_of_cache_types = ARRAY_SIZE(polaris11_cache_info);
+               break;
+       case CHIP_POLARIS12:
+-              pcache_info = polaris12_cache_info;
++              *pcache_info = polaris12_cache_info;
+               num_of_cache_types = ARRAY_SIZE(polaris12_cache_info);
+               break;
+       case CHIP_VEGAM:
+-              pcache_info = vegam_cache_info;
++              *pcache_info = vegam_cache_info;
+               num_of_cache_types = ARRAY_SIZE(vegam_cache_info);
+               break;
+       default:
+               switch (KFD_GC_VERSION(kdev)) {
+               case IP_VERSION(9, 0, 1):
+-                      pcache_info = vega10_cache_info;
++                      *pcache_info = vega10_cache_info;
+                       num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
+                       break;
+               case IP_VERSION(9, 2, 1):
+-                      pcache_info = vega12_cache_info;
++                      *pcache_info = vega12_cache_info;
+                       num_of_cache_types = ARRAY_SIZE(vega12_cache_info);
+                       break;
+               case IP_VERSION(9, 4, 0):
+               case IP_VERSION(9, 4, 1):
+-                      pcache_info = vega20_cache_info;
++                      *pcache_info = vega20_cache_info;
+                       num_of_cache_types = ARRAY_SIZE(vega20_cache_info);
+                       break;
+               case IP_VERSION(9, 4, 2):
+-                      pcache_info = aldebaran_cache_info;
++                      *pcache_info = aldebaran_cache_info;
+                       num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info);
+                       break;
+               case IP_VERSION(9, 1, 0):
+               case IP_VERSION(9, 2, 2):
+-                      pcache_info = raven_cache_info;
++                      *pcache_info = raven_cache_info;
+                       num_of_cache_types = ARRAY_SIZE(raven_cache_info);
+                       break;
+               case IP_VERSION(9, 3, 0):
+-                      pcache_info = renoir_cache_info;
++                      *pcache_info = renoir_cache_info;
+                       num_of_cache_types = ARRAY_SIZE(renoir_cache_info);
+                       break;
+               case IP_VERSION(10, 1, 10):
+               case IP_VERSION(10, 1, 2):
+               case IP_VERSION(10, 1, 3):
+               case IP_VERSION(10, 1, 4):
+-                      pcache_info = navi10_cache_info;
++                      *pcache_info = navi10_cache_info;
+                       num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
+                       break;
+               case IP_VERSION(10, 1, 1):
+-                      pcache_info = navi14_cache_info;
++                      *pcache_info = navi14_cache_info;
+                       num_of_cache_types = ARRAY_SIZE(navi14_cache_info);
+                       break;
+               case IP_VERSION(10, 3, 0):
+-                      pcache_info = sienna_cichlid_cache_info;
++                      *pcache_info = sienna_cichlid_cache_info;
+                       num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info);
+                       break;
+               case IP_VERSION(10, 3, 2):
+-                      pcache_info = navy_flounder_cache_info;
++                      *pcache_info = navy_flounder_cache_info;
+                       num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info);
+                       break;
+               case IP_VERSION(10, 3, 4):
+-                      pcache_info = dimgrey_cavefish_cache_info;
++                      *pcache_info = dimgrey_cavefish_cache_info;
+                       num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info);
+                       break;
+               case IP_VERSION(10, 3, 1):
+-                      pcache_info = vangogh_cache_info;
++                      *pcache_info = vangogh_cache_info;
+                       num_of_cache_types = ARRAY_SIZE(vangogh_cache_info);
+                       break;
+               case IP_VERSION(10, 3, 5):
+-                      pcache_info = beige_goby_cache_info;
++                      *pcache_info = beige_goby_cache_info;
+                       num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info);
+                       break;
+               case IP_VERSION(10, 3, 3):
+-                      pcache_info = yellow_carp_cache_info;
++                      *pcache_info = yellow_carp_cache_info;
+                       num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info);
+                       break;
+               case IP_VERSION(10, 3, 6):
+-                      pcache_info = gc_10_3_6_cache_info;
++                      *pcache_info = gc_10_3_6_cache_info;
+                       num_of_cache_types = ARRAY_SIZE(gc_10_3_6_cache_info);
+                       break;
+               case IP_VERSION(10, 3, 7):
+-                      pcache_info = gfx1037_cache_info;
++                      *pcache_info = gfx1037_cache_info;
+                       num_of_cache_types = ARRAY_SIZE(gfx1037_cache_info);
+                       break;
+               case IP_VERSION(11, 0, 0):
+               case IP_VERSION(11, 0, 1):
+               case IP_VERSION(11, 0, 2):
+               case IP_VERSION(11, 0, 3):
+-                      pcache_info = cache_info;
+                       num_of_cache_types =
+-                              kfd_fill_gpu_cache_info_from_gfx_config(kdev, pcache_info);
++                              kfd_fill_gpu_cache_info_from_gfx_config(kdev, *pcache_info);
+                       break;
+               default:
+-                      pcache_info = dummy_cache_info;
++                      *pcache_info = dummy_cache_info;
+                       num_of_cache_types = ARRAY_SIZE(dummy_cache_info);
+                       pr_warn("dummy cache info is used temporarily and real cache info need update later.\n");
+                       break;
+               }
+       }
+-
+-      *size_filled = 0;
+-      *num_of_entries = 0;
+-
+-      /* For each type of cache listed in the kfd_gpu_cache_info table,
+-       * go through all available Compute Units.
+-       * The [i,j,k] loop will
+-       *              if kfd_gpu_cache_info.num_cu_shared = 1
+-       *                      will parse through all available CU
+-       *              If (kfd_gpu_cache_info.num_cu_shared != 1)
+-       *                      then it will consider only one CU from
+-       *                      the shared unit
+-       */
+-
+-      for (ct = 0; ct < num_of_cache_types; ct++) {
+-        cu_processor_id = gpu_processor_id;
+-        if (pcache_info[ct].cache_level == 1) {
+-          for (i = 0; i < cu_info->num_shader_engines; i++) {
+-            for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) {
+-              for (k = 0; k < cu_info->num_cu_per_sh;
+-                k += pcache_info[ct].num_cu_shared) {
+-                ret = fill_in_l1_pcache(pcache,
+-                                      pcache_info,
+-                                      cu_info,
+-                                      mem_available,
+-                                      cu_info->cu_bitmap[i % 4][j + i / 4],
+-                                      ct,
+-                                      cu_processor_id,
+-                                      k);
+-
+-                if (ret < 0)
+-                      break;
+-
+-                if (!ret) {
+-                              pcache++;
+-                              (*num_of_entries)++;
+-                              mem_available -= sizeof(*pcache);
+-                              (*size_filled) += sizeof(*pcache);
+-                }
+-
+-                /* Move to next CU block */
+-                num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <=
+-                                      cu_info->num_cu_per_sh) ?
+-                                      pcache_info[ct].num_cu_shared :
+-                                      (cu_info->num_cu_per_sh - k);
+-                cu_processor_id += num_cu_shared;
+-              }
+-            }
+-          }
+-        } else {
+-                      ret = fill_in_l2_l3_pcache(pcache,
+-                              pcache_info,
+-                              cu_info,
+-                              mem_available,
+-                              ct,
+-                              cu_processor_id);
+-
+-                      if (ret < 0)
+-                              break;
+-
+-                      if (!ret) {
+-                              pcache++;
+-                              (*num_of_entries)++;
+-                              mem_available -= sizeof(*pcache);
+-                              (*size_filled) += sizeof(*pcache);
+-                      }
+-        }
+-      }
+-
+-      pr_debug("Added [%d] GPU cache entries\n", *num_of_entries);
+-
+-      return 0;
++      return num_of_cache_types;
+ }
+ 
+ static bool kfd_ignore_crat(void)
+@@ -2314,8 +2091,6 @@ static int kfd_create_vcrat_image_gpu(vo
+       struct kfd_cu_info cu_info;
+       int avail_size = *size;
+       uint32_t total_num_of_cu;
+-      int num_of_cache_entries = 0;
+-      int cache_mem_filled = 0;
+       uint32_t nid = 0;
+       int ret = 0;
+ 
+@@ -2416,31 +2191,12 @@ static int kfd_create_vcrat_image_gpu(vo
+       crat_table->length += sizeof(struct crat_subtype_memory);
+       crat_table->total_entries++;
+ 
+-      /* TODO: Fill in cache information. This information is NOT readily
+-       * available in KGD
+-       */
+-      sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
+-              sub_type_hdr->length);
+-      ret = kfd_fill_gpu_cache_info(kdev, cu->processor_id_low,
+-                              avail_size,
+-                              &cu_info,
+-                              (struct crat_subtype_cache *)sub_type_hdr,
+-                              &cache_mem_filled,
+-                              &num_of_cache_entries);
+-
+-      if (ret < 0)
+-              return ret;
+-
+-      crat_table->length += cache_mem_filled;
+-      crat_table->total_entries += num_of_cache_entries;
+-      avail_size -= cache_mem_filled;
+-
+       /* Fill in Subtype: IO_LINKS
+        *  Only direct links are added here which is Link from GPU to
+        *  its NUMA node. Indirect links are added by userspace.
+        */
+       sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
+-              cache_mem_filled);
++              sub_type_hdr->length);
+       ret = kfd_fill_gpu_direct_io_link_to_cpu(&avail_size, kdev,
+               (struct crat_subtype_iolink *)sub_type_hdr, proximity_domain);
+ 
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
+@@ -317,6 +317,18 @@ struct cdit_header {
+ 
+ struct kfd_dev;
+ 
++/* Static table to describe GPU Cache information */
++struct kfd_gpu_cache_info {
++      uint32_t        cache_size;
++      uint32_t        cache_level;
++      uint32_t        flags;
++      /* Indicates how many Compute Units share this cache
++       * within a SA. Value = 1 indicates the cache is not shared
++       */
++      uint32_t        num_cu_shared;
++};
++int kfd_get_gpu_cache_info(struct kfd_dev *kdev, struct kfd_gpu_cache_info **pcache_info);
++
+ int kfd_create_crat_image_acpi(void **crat_image, size_t *size);
+ void kfd_destroy_crat_image(void *crat_image);
+ int kfd_parse_crat_table(void *crat_image, struct list_head *device_list,
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+@@ -364,7 +364,6 @@ static ssize_t kfd_cache_show(struct kob
+ 
+       /* Making sure that the buffer is an empty string */
+       buffer[0] = 0;
+-
+       cache = container_of(attr, struct kfd_cache_properties, attr);
+       if (cache->gpu && kfd_devcgroup_check_permission(cache->gpu))
+               return -EPERM;
+@@ -379,12 +378,13 @@ static ssize_t kfd_cache_show(struct kob
+       sysfs_show_32bit_prop(buffer, offs, "association", cache->cache_assoc);
+       sysfs_show_32bit_prop(buffer, offs, "latency", cache->cache_latency);
+       sysfs_show_32bit_prop(buffer, offs, "type", cache->cache_type);
++
+       offs += snprintf(buffer+offs, PAGE_SIZE-offs, "sibling_map ");
+-      for (i = 0; i < CRAT_SIBLINGMAP_SIZE; i++)
++      for (i = 0; i < cache->sibling_map_size; i++)
+               for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++)
+                       /* Check each bit */
+                       offs += snprintf(buffer+offs, PAGE_SIZE-offs, "%d,",
+-                                       (cache->sibling_map[i] >> j) & 1);
++                                              (cache->sibling_map[i] >> j) & 1);
+ 
+       /* Replace the last "," with end of line */
+       buffer[offs-1] = '\n';
+@@ -1198,7 +1198,6 @@ static struct kfd_topology_device *kfd_a
+       struct kfd_iolink_properties *iolink;
+       struct kfd_iolink_properties *p2plink;
+ 
+-      down_write(&topology_lock);
+       list_for_each_entry(dev, &topology_device_list, list) {
+               /* Discrete GPUs need their own topology device list
+                * entries. Don't assign them to CPU/APU nodes.
+@@ -1222,7 +1221,6 @@ static struct kfd_topology_device *kfd_a
+                       break;
+               }
+       }
+-      up_write(&topology_lock);
+       return out_dev;
+ }
+ 
+@@ -1593,6 +1591,221 @@ out:
+       return ret;
+ }
+ 
++
++/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
++static int fill_in_l1_pcache(struct kfd_cache_properties **props_ext,
++                              struct kfd_gpu_cache_info *pcache_info,
++                              struct kfd_cu_info *cu_info,
++                              int cu_bitmask,
++                              int cache_type, unsigned int cu_processor_id,
++                              int cu_block)
++{
++      unsigned int cu_sibling_map_mask;
++      int first_active_cu;
++      struct kfd_cache_properties *pcache = NULL;
++
++      cu_sibling_map_mask = cu_bitmask;
++      cu_sibling_map_mask >>= cu_block;
++      cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1);
++      first_active_cu = ffs(cu_sibling_map_mask);
++
++      /* CU could be inactive. In case of shared cache find the first active
++       * CU. and incase of non-shared cache check if the CU is inactive. If
++       * inactive active skip it
++       */
++      if (first_active_cu) {
++              pcache = kfd_alloc_struct(pcache);
++              if (!pcache)
++                      return -ENOMEM;
++
++              memset(pcache, 0, sizeof(struct kfd_cache_properties));
++              pcache->processor_id_low = cu_processor_id + (first_active_cu - 1);
++              pcache->cache_level = pcache_info[cache_type].cache_level;
++              pcache->cache_size = pcache_info[cache_type].cache_size;
++
++              if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE)
++                      pcache->cache_type |= HSA_CACHE_TYPE_DATA;
++              if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_INST_CACHE)
++                      pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION;
++              if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_CPU_CACHE)
++                      pcache->cache_type |= HSA_CACHE_TYPE_CPU;
++              if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_SIMD_CACHE)
++                      pcache->cache_type |= HSA_CACHE_TYPE_HSACU;
++
++              /* Sibling map is w.r.t processor_id_low, so shift out
++               * inactive CU
++               */
++              cu_sibling_map_mask =
++                      cu_sibling_map_mask >> (first_active_cu - 1);
++
++              pcache->sibling_map[0] = (uint8_t)(cu_sibling_map_mask & 0xFF);
++              pcache->sibling_map[1] =
++                              (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
++              pcache->sibling_map[2] =
++                              (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
++              pcache->sibling_map[3] =
++                              (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
++
++              pcache->sibling_map_size = 4;
++              *props_ext = pcache;
++
++              return 0;
++      }
++      return 1;
++}
++
++/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
++static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
++                              struct kfd_gpu_cache_info *pcache_info,
++                              struct kfd_cu_info *cu_info,
++                              int cache_type, unsigned int cu_processor_id)
++{
++      unsigned int cu_sibling_map_mask;
++      int first_active_cu;
++      int i, j, k;
++      struct kfd_cache_properties *pcache = NULL;
++
++      cu_sibling_map_mask = cu_info->cu_bitmap[0][0];
++      cu_sibling_map_mask &=
++              ((1 << pcache_info[cache_type].num_cu_shared) - 1);
++      first_active_cu = ffs(cu_sibling_map_mask);
++
++      /* CU could be inactive. In case of shared cache find the first active
++       * CU. and incase of non-shared cache check if the CU is inactive. If
++       * inactive active skip it
++       */
++      if (first_active_cu) {
++              pcache = kfd_alloc_struct(pcache);
++              if (!pcache)
++                      return -ENOMEM;
++
++              memset(pcache, 0, sizeof(struct kfd_cache_properties));
++              pcache->processor_id_low = cu_processor_id
++                                      + (first_active_cu - 1);
++              pcache->cache_level = pcache_info[cache_type].cache_level;
++              pcache->cache_size = pcache_info[cache_type].cache_size;
++
++              if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE)
++                      pcache->cache_type |= HSA_CACHE_TYPE_DATA;
++              if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_INST_CACHE)
++                      pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION;
++              if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_CPU_CACHE)
++                      pcache->cache_type |= HSA_CACHE_TYPE_CPU;
++              if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_SIMD_CACHE)
++                      pcache->cache_type |= HSA_CACHE_TYPE_HSACU;
++
++              /* Sibling map is w.r.t processor_id_low, so shift out
++               * inactive CU
++               */
++              cu_sibling_map_mask = cu_sibling_map_mask >> (first_active_cu - 1);
++              k = 0;
++
++              for (i = 0; i < cu_info->num_shader_engines; i++) {
++                      for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) {
++                              pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF);
++                              pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
++                              pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
++                              pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
++                              k += 4;
++
++                              cu_sibling_map_mask = cu_info->cu_bitmap[i % 4][j + i / 4];
++                              cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1);
++                      }
++              }
++              pcache->sibling_map_size = k;
++              *props_ext = pcache;
++              return 0;
++      }
++      return 1;
++}
++
++#define KFD_MAX_CACHE_TYPES 6
++
++/* kfd_fill_cache_non_crat_info - Fill GPU cache info using kfd_gpu_cache_info
++ * tables
++ */
++void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct kfd_dev *kdev)
++{
++      struct kfd_gpu_cache_info *pcache_info = NULL;
++      int i, j, k;
++      int ct = 0;
++      unsigned int cu_processor_id;
++      int ret;
++      unsigned int num_cu_shared;
++      struct kfd_cu_info cu_info;
++      struct kfd_cu_info *pcu_info;
++      int gpu_processor_id;
++      struct kfd_cache_properties *props_ext;
++      int num_of_entries = 0;
++      int num_of_cache_types = 0;
++      struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES];
++
++      amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info);
++      pcu_info = &cu_info;
++
++      gpu_processor_id = dev->node_props.simd_id_base;
++
++      pcache_info = cache_info;
++      num_of_cache_types = kfd_get_gpu_cache_info(kdev, &pcache_info);
++      if (!num_of_cache_types) {
++              pr_warn("no cache info found\n");
++              return;
++      }
++
++      /* For each type of cache listed in the kfd_gpu_cache_info table,
++       * go through all available Compute Units.
++       * The [i,j,k] loop will
++       *              if kfd_gpu_cache_info.num_cu_shared = 1
++       *                      will parse through all available CU
++       *              If (kfd_gpu_cache_info.num_cu_shared != 1)
++       *                      then it will consider only one CU from
++       *                      the shared unit
++       */
++      for (ct = 0; ct < num_of_cache_types; ct++) {
++              cu_processor_id = gpu_processor_id;
++              if (pcache_info[ct].cache_level == 1) {
++                      for (i = 0; i < pcu_info->num_shader_engines; i++) {
++                              for (j = 0; j < pcu_info->num_shader_arrays_per_engine; j++) {
++                                      for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) {
++
++                                              ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info,
++                                                                              pcu_info->cu_bitmap[i % 4][j + i / 4], ct,
++                                                                              cu_processor_id, k);
++
++                                              if (ret < 0)
++                                                      break;
++
++                                              if (!ret) {
++                                                      num_of_entries++;
++                                                      list_add_tail(&props_ext->list, &dev->cache_props);
++                                              }
++
++                                              /* Move to next CU block */
++                                              num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <=
++                                                      pcu_info->num_cu_per_sh) ?
++                                                      pcache_info[ct].num_cu_shared :
++                                                      (pcu_info->num_cu_per_sh - k);
++                                              cu_processor_id += num_cu_shared;
++                                      }
++                              }
++                      }
++              } else {
++                      ret = fill_in_l2_l3_pcache(&props_ext, pcache_info,
++                                                              pcu_info, ct, cu_processor_id);
++
++                      if (ret < 0)
++                              break;
++
++                      if (!ret) {
++                              num_of_entries++;
++                              list_add_tail(&props_ext->list, &dev->cache_props);
++                      }
++              }
++      }
++      dev->node_props.caches_count += num_of_entries;
++      pr_debug("Added [%d] GPU cache entries\n", num_of_entries);
++}
++
+ int kfd_topology_add_device(struct kfd_dev *gpu)
+ {
+       uint32_t gpu_id;
+@@ -1617,9 +1830,9 @@ int kfd_topology_add_device(struct kfd_d
+        * CRAT to create a new topology device. Once created assign the gpu to
+        * that topology device
+        */
++      down_write(&topology_lock);
+       dev = kfd_assign_gpu(gpu);
+       if (!dev) {
+-              down_write(&topology_lock);
+               proximity_domain = ++topology_crat_proximity_domain;
+ 
+               res = kfd_create_crat_image_virtual(&crat_image, &image_size,
+@@ -1631,6 +1844,7 @@ int kfd_topology_add_device(struct kfd_d
+                       topology_crat_proximity_domain--;
+                       return res;
+               }
++
+               res = kfd_parse_crat_table(crat_image,
+                                          &temp_topology_device_list,
+                                          proximity_domain);
+@@ -1644,23 +1858,28 @@ int kfd_topology_add_device(struct kfd_d
+               kfd_topology_update_device_list(&temp_topology_device_list,
+                       &topology_device_list);
+ 
++              dev = kfd_assign_gpu(gpu);
++              if (WARN_ON(!dev)) {
++                      res = -ENODEV;
++                      goto err;
++              }
++
++              /* Fill the cache affinity information here for the GPUs
++               * using VCRAT
++               */
++              kfd_fill_cache_non_crat_info(dev, gpu);
++
+               /* Update the SYSFS tree, since we added another topology
+                * device
+                */
+               res = kfd_topology_update_sysfs();
+-              up_write(&topology_lock);
+-
+               if (!res)
+                       sys_props.generation_count++;
+               else
+                       pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n",
+                                               gpu_id, res);
+-              dev = kfd_assign_gpu(gpu);
+-              if (WARN_ON(!dev)) {
+-                      res = -ENODEV;
+-                      goto err;
+-              }
+       }
++      up_write(&topology_lock);
+ 
+       dev->gpu_id = gpu_id;
+       gpu->id = gpu_id;
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+@@ -80,6 +80,8 @@ struct kfd_mem_properties {
+       struct attribute        attr;
+ };
+ 
++#define CACHE_SIBLINGMAP_SIZE 64
++
+ struct kfd_cache_properties {
+       struct list_head        list;
+       uint32_t                processor_id_low;
+@@ -90,10 +92,11 @@ struct kfd_cache_properties {
+       uint32_t                cache_assoc;
+       uint32_t                cache_latency;
+       uint32_t                cache_type;
+-      uint8_t                 sibling_map[CRAT_SIBLINGMAP_SIZE];
++      uint8_t                 sibling_map[CACHE_SIBLINGMAP_SIZE];
+       struct kfd_dev          *gpu;
+       struct kobject          *kobj;
+       struct attribute        attr;
++      uint32_t                sibling_map_size;
+ };
+ 
+ struct kfd_iolink_properties {
diff --git a/queue-6.1/drm-amdkfd-introduce-dummy-cache-info-for-property-asic.patch b/queue-6.1/drm-amdkfd-introduce-dummy-cache-info-for-property-asic.patch

new file mode 100644 (file)

index 0000000..bb9fc4c
--- /dev/null
+++ b/queue-6.1/drm-amdkfd-introduce-dummy-cache-info-for-property-asic.patch
@@ -0,0 +1,89 @@
+From fd72e2cb2f9dd2734e8013b3e185a21f0d605d3e Mon Sep 17 00:00:00 2001
+From: Prike Liang <Prike.Liang@amd.com>
+Date: Fri, 21 Oct 2022 16:38:48 -0400
+Subject: drm/amdkfd: introduce dummy cache info for property asic
+
+From: Prike Liang <Prike.Liang@amd.com>
+
+commit fd72e2cb2f9dd2734e8013b3e185a21f0d605d3e upstream.
+
+This dummy cache info will enable kfd base function support.
+
+Signed-off-by: Prike Liang <Prike.Liang@amd.com>
+Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: "Limonciello, Mario" <Mario.Limonciello@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdkfd/kfd_crat.c |   53 +++++++++++++++++++++++++++++++++-
+ 1 file changed, 52 insertions(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+@@ -891,6 +891,54 @@ static struct kfd_gpu_cache_info gc_10_3
+       },
+ };
+ 
++static struct kfd_gpu_cache_info dummy_cache_info[] = {
++      {
++              /* TCP L1 Cache per CU */
++              .cache_size = 16,
++              .cache_level = 1,
++              .flags = (CRAT_CACHE_FLAGS_ENABLED |
++                              CRAT_CACHE_FLAGS_DATA_CACHE |
++                              CRAT_CACHE_FLAGS_SIMD_CACHE),
++              .num_cu_shared = 1,
++      },
++      {
++              /* Scalar L1 Instruction Cache per SQC */
++              .cache_size = 32,
++              .cache_level = 1,
++              .flags = (CRAT_CACHE_FLAGS_ENABLED |
++                              CRAT_CACHE_FLAGS_INST_CACHE |
++                              CRAT_CACHE_FLAGS_SIMD_CACHE),
++              .num_cu_shared = 2,
++      },
++      {
++              /* Scalar L1 Data Cache per SQC */
++              .cache_size = 16,
++              .cache_level = 1,
++              .flags = (CRAT_CACHE_FLAGS_ENABLED |
++                              CRAT_CACHE_FLAGS_DATA_CACHE |
++                              CRAT_CACHE_FLAGS_SIMD_CACHE),
++              .num_cu_shared = 2,
++      },
++      {
++              /* GL1 Data Cache per SA */
++              .cache_size = 128,
++              .cache_level = 1,
++              .flags = (CRAT_CACHE_FLAGS_ENABLED |
++                              CRAT_CACHE_FLAGS_DATA_CACHE |
++                              CRAT_CACHE_FLAGS_SIMD_CACHE),
++              .num_cu_shared = 6,
++      },
++      {
++              /* L2 Data Cache per GPU (Total Tex Cache) */
++              .cache_size = 2048,
++              .cache_level = 2,
++              .flags = (CRAT_CACHE_FLAGS_ENABLED |
++                              CRAT_CACHE_FLAGS_DATA_CACHE |
++                              CRAT_CACHE_FLAGS_SIMD_CACHE),
++              .num_cu_shared = 6,
++      },
++};
++
+ static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev,
+               struct crat_subtype_computeunit *cu)
+ {
+@@ -1630,7 +1678,10 @@ static int kfd_fill_gpu_cache_info(struc
+                               kfd_fill_gpu_cache_info_from_gfx_config(kdev, pcache_info);
+                       break;
+               default:
+-                      return -EINVAL;
++                      pcache_info = dummy_cache_info;
++                      num_of_cache_types = ARRAY_SIZE(dummy_cache_info);
++                      pr_warn("dummy cache info is used temporarily and real cache info need update later.\n");
++                      break;
+               }
+       }
+ 
diff --git a/queue-6.1/series b/queue-6.1/series

index 1e002bb317faafe620e662062b4212c49480f446..744be745ff486562457e7875870795869d350daf 100644 (file)
--- a/queue-6.1/series
+++ b/queue-6.1/series
@@ -218,3 +218,6 @@ dm-crypt-add-cond_resched-to-dmcrypt_write.patch
  dm-crypt-avoid-accessing-uninitialized-tasklet.patch
  sched-fair-sanitize-vruntime-of-entity-being-placed.patch
  sched-fair-sanitize-vruntime-of-entity-being-migrated.patch
+drm-amdkfd-introduce-dummy-cache-info-for-property-asic.patch
+drm-amdkfd-fix-the-warning-of-array-index-out-of-bounds.patch
+drm-amdkfd-add-gc-11.0.4-kfd-support.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 28 Mar 2023 13:32:37 +0000 (15:32 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 28 Mar 2023 13:32:37 +0000 (15:32 +0200)
queue-6.1/drm-amdkfd-add-gc-11.0.4-kfd-support.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/drm-amdkfd-fix-the-warning-of-array-index-out-of-bounds.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/drm-amdkfd-introduce-dummy-cache-info-for-property-asic.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/series		patch \| blob \| blame \| history