drm/amdgpu: Add logic for VF data exchange region to init from dynamic crit_region...

author Ellen Pan <yunru.pan@amd.com>

Wed, 8 Oct 2025 20:36:50 +0000 (15:36 -0500)

committer Alex Deucher <alexander.deucher@amd.com>

Mon, 20 Oct 2025 22:28:14 +0000 (18:28 -0400)
author Ellen Pan <yunru.pan@amd.com>
Wed, 8 Oct 2025 20:36:50 +0000 (15:36 -0500)
committer Alex Deucher <alexander.deucher@amd.com>
Mon, 20 Oct 2025 22:28:14 +0000 (18:28 -0400)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c

index bf0fd95919e6cfcbb11afcf7da861387b892567e..66e9cd1035974cc734e394b039b37f3fbdec4ac2 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -218,12 +218,12 @@ int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev)
                                     &adev->virt.mm_table.gpu_addr,
                                     (void *)&adev->virt.mm_table.cpu_addr);
         if (r) {
-               DRM_ERROR("failed to alloc mm table and error = %d.\n", r);
+               dev_err(adev->dev, "failed to alloc mm table and error = %d.\n", r);
                 return r;
         }
  
         memset((void *)adev->virt.mm_table.cpu_addr, 0, PAGE_SIZE);
-       DRM_INFO("MM table gpu addr = 0x%llx, cpu addr = %p.\n",
+       dev_info(adev->dev, "MM table gpu addr = 0x%llx, cpu addr = %p.\n",
                  adev->virt.mm_table.gpu_addr,
                  adev->virt.mm_table.cpu_addr);
         return 0;
@@ -403,7 +403,9 @@ static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev)
                         if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT,
                                                         AMDGPU_GPU_PAGE_SIZE,
                                                         &bo, NULL))
-                               DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", bp);
+                               dev_dbg(adev->dev,
+                                               "RAS WARN: reserve vram for retired page %llx fail\n",
+                                               bp);
                         data->bps_bo[i] = bo;
                 }
                 data->last_reserved = i + 1;
@@ -671,10 +673,34 @@ out:
         schedule_delayed_work(&(adev->virt.vf2pf_work), adev->virt.vf2pf_update_interval_ms);
  }
  
+static int amdgpu_virt_read_exchange_data_from_mem(struct amdgpu_device *adev, uint32_t *pfvf_data)
+{
+       uint32_t dataexchange_offset =
+               adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset;
+       uint32_t dataexchange_size =
+               adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb << 10;
+       uint64_t pos = 0;
+
+       dev_info(adev->dev,
+                       "Got data exchange info from dynamic crit_region_table at offset 0x%x with size of 0x%x bytes.\n",
+                       dataexchange_offset, dataexchange_size);
+
+       if (!IS_ALIGNED(dataexchange_offset, 4) || !IS_ALIGNED(dataexchange_size, 4)) {
+               dev_err(adev->dev, "Data exchange data not aligned to 4 bytes\n");
+               return -EINVAL;
+       }
+
+       pos = (uint64_t)dataexchange_offset;
+       amdgpu_device_vram_access(adev, pos, pfvf_data,
+                       dataexchange_size, false);
+
+       return 0;
+}
+
  void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev)
  {
         if (adev->virt.vf2pf_update_interval_ms != 0) {
-               DRM_INFO("clean up the vf2pf work item\n");
+               dev_info(adev->dev, "clean up the vf2pf work item\n");
                 cancel_delayed_work_sync(&adev->virt.vf2pf_work);
                 adev->virt.vf2pf_update_interval_ms = 0;
         }
@@ -682,13 +708,15 @@ void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev)
  
  void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
  {
+       uint32_t *pfvf_data = NULL;
+
         adev->virt.fw_reserve.p_pf2vf = NULL;
         adev->virt.fw_reserve.p_vf2pf = NULL;
         adev->virt.vf2pf_update_interval_ms = 0;
         adev->virt.vf2pf_update_retry_cnt = 0;
  
         if (adev->mman.fw_vram_usage_va && adev->mman.drv_vram_usage_va) {
-               DRM_WARN("Currently fw_vram and drv_vram should not have values at the same time!");
+               dev_warn(adev->dev, "Currently fw_vram and drv_vram should not have values at the same time!");
         } else if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) {
                 /* go through this logic in ip_init and reset to init workqueue*/
                 amdgpu_virt_exchange_data(adev);
@@ -697,11 +725,34 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
                 schedule_delayed_work(&(adev->virt.vf2pf_work), msecs_to_jiffies(adev->virt.vf2pf_update_interval_ms));
         } else if (adev->bios != NULL) {
                 /* got through this logic in early init stage to get necessary flags, e.g. rlcg_acc related*/
-               adev->virt.fw_reserve.p_pf2vf =
-                       (struct amd_sriov_msg_pf2vf_info_header *)
-                       (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
+               if (adev->virt.req_init_data_ver == GPU_CRIT_REGION_V2) {
+                       pfvf_data =
+                               kzalloc(adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb << 10,
+                                       GFP_KERNEL);
+                       if (!pfvf_data) {
+                               dev_err(adev->dev, "Failed to allocate memory for pfvf_data\n");
+                               return;
+                       }
  
-               amdgpu_virt_read_pf2vf_data(adev);
+                       if (amdgpu_virt_read_exchange_data_from_mem(adev, pfvf_data))
+                               goto free_pfvf_data;
+
+                       adev->virt.fw_reserve.p_pf2vf =
+                               (struct amd_sriov_msg_pf2vf_info_header *)pfvf_data;
+
+                       amdgpu_virt_read_pf2vf_data(adev);
+
+free_pfvf_data:
+                       kfree(pfvf_data);
+                       pfvf_data = NULL;
+                       adev->virt.fw_reserve.p_pf2vf = NULL;
+               } else {
+                       adev->virt.fw_reserve.p_pf2vf =
+                               (struct amd_sriov_msg_pf2vf_info_header *)
+                               (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
+
+                       amdgpu_virt_read_pf2vf_data(adev);
+               }
         }
  }
  
@@ -714,14 +765,29 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev)
  
         if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) {
                 if (adev->mman.fw_vram_usage_va) {
-                       adev->virt.fw_reserve.p_pf2vf =
-                               (struct amd_sriov_msg_pf2vf_info_header *)
-                               (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
-                       adev->virt.fw_reserve.p_vf2pf =
-                               (struct amd_sriov_msg_vf2pf_info_header *)
-                               (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10));
-                       adev->virt.fw_reserve.ras_telemetry =
-                               (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10));
+                       if (adev->virt.req_init_data_ver == GPU_CRIT_REGION_V2) {
+                               adev->virt.fw_reserve.p_pf2vf =
+                                       (struct amd_sriov_msg_pf2vf_info_header *)
+                                       (adev->mman.fw_vram_usage_va +
+                                       adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset);
+                               adev->virt.fw_reserve.p_vf2pf =
+                                       (struct amd_sriov_msg_vf2pf_info_header *)
+                                       (adev->mman.fw_vram_usage_va +
+                                       adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset +
+                                       (AMD_SRIOV_MSG_SIZE_KB << 10));
+                               adev->virt.fw_reserve.ras_telemetry =
+                                       (adev->mman.fw_vram_usage_va +
+                                       adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].offset);
+                       } else {
+                               adev->virt.fw_reserve.p_pf2vf =
+                                       (struct amd_sriov_msg_pf2vf_info_header *)
+                                       (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
+                               adev->virt.fw_reserve.p_vf2pf =
+                                       (struct amd_sriov_msg_vf2pf_info_header *)
+                                       (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10));
+                               adev->virt.fw_reserve.ras_telemetry =
+                                       (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10));
+                       }
                 } else if (adev->mman.drv_vram_usage_va) {
                         adev->virt.fw_reserve.p_pf2vf =
                                 (struct amd_sriov_msg_pf2vf_info_header *)
@@ -829,7 +895,7 @@ static bool amdgpu_virt_init_req_data(struct amdgpu_device *adev, u32 reg)
                         break;
                 default: /* other chip doesn't support SRIOV */
                         is_sriov = false;
-                       DRM_ERROR("Unknown asic type: %d!\n", adev->asic_type);
+                       dev_err(adev->dev, "Unknown asic type: %d!\n", adev->asic_type);
                         break;
                 }
         }
@@ -1510,7 +1576,7 @@ amdgpu_ras_block_to_sriov(struct amdgpu_device *adev, enum amdgpu_ras_block bloc
         case AMDGPU_RAS_BLOCK__MPIO:
                 return RAS_TELEMETRY_GPU_BLOCK_MPIO;
         default:
-               DRM_WARN_ONCE("Unsupported SRIOV RAS telemetry block 0x%x\n",
+               dev_warn(adev->dev, "Unsupported SRIOV RAS telemetry block 0x%x\n",
                               block);
                 return RAS_TELEMETRY_GPU_BLOCK_COUNT;
         }
author	Ellen Pan <yunru.pan@amd.com>
	Wed, 8 Oct 2025 20:36:50 +0000 (15:36 -0500)
committer	Alex Deucher <alexander.deucher@amd.com>
	Mon, 20 Oct 2025 22:28:14 +0000 (18:28 -0400)