drm/xe/vf: Ensure media GT VF recovery runs after primary GT on PTL

author Matthew Brost <matthew.brost@intel.com>

Wed, 8 Oct 2025 21:45:30 +0000 (14:45 -0700)

committer Matthew Brost <matthew.brost@intel.com>

Thu, 9 Oct 2025 10:24:28 +0000 (03:24 -0700)
author Matthew Brost <matthew.brost@intel.com>
Wed, 8 Oct 2025 21:45:30 +0000 (14:45 -0700)
committer Matthew Brost <matthew.brost@intel.com>
Thu, 9 Oct 2025 10:24:28 +0000 (03:24 -0700)
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c

index edd98a4da3066294d84f054bde58206a38ff6ce6..b527f78da295087dd0a0cf935732c9f8024fb02d 100644 (file)
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
@@ -1107,8 +1107,22 @@ void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p)
                    pf_version->major, pf_version->minor);
  }
  
-static void vf_post_migration_shutdown(struct xe_gt *gt)
+static bool vf_post_migration_shutdown(struct xe_gt *gt)
  {
+       struct xe_device *xe = gt_to_xe(gt);
+
+       /*
+        * On platforms where CCS must be restored by the primary GT, the media
+        * GT's VF post-migration recovery must run afterward. Detect this case
+        * and re-queue the media GT's restore work item if necessary.
+        */
+       if (xe->info.needs_shared_vf_gt_wq && xe_gt_is_media_type(gt)) {
+               struct xe_gt *primary_gt = gt_to_tile(gt)->primary_gt;
+
+               if (xe_gt_sriov_vf_recovery_pending(primary_gt))
+                       return true;
+       }
+
         spin_lock_irq(&gt->sriov.vf.migration.lock);
         gt->sriov.vf.migration.recovery_queued = false;
         spin_unlock_irq(&gt->sriov.vf.migration.lock);
@@ -1116,6 +1130,8 @@ static void vf_post_migration_shutdown(struct xe_gt *gt)
         xe_guc_ct_flush_and_stop(&gt->uc.guc.ct);
         xe_guc_submit_pause(&gt->uc.guc);
         xe_tlb_inval_reset(&gt->tlb_inval);
+
+       return false;
  }
  
  static size_t post_migration_scratch_size(struct xe_device *xe)
@@ -1194,11 +1210,14 @@ static void vf_post_migration_recovery(struct xe_gt *gt)
  {
         struct xe_device *xe = gt_to_xe(gt);
         int err;
+       bool retry;
  
         xe_gt_sriov_dbg(gt, "migration recovery in progress\n");
  
         xe_pm_runtime_get(xe);
-       vf_post_migration_shutdown(gt);
+       retry = vf_post_migration_shutdown(gt);
+       if (retry)
+               goto queue;
  
         if (!xe_sriov_vf_migration_supported(xe)) {
                 xe_gt_sriov_err(gt, "migration is not supported\n");
@@ -1226,6 +1245,12 @@ fail:
         xe_pm_runtime_put(xe);
         xe_gt_sriov_err(gt, "migration recovery failed (%pe)\n", ERR_PTR(err));
         xe_device_declare_wedged(xe);
+       return;
+
+queue:
+       xe_gt_sriov_info(gt, "Re-queuing migration recovery\n");
+       queue_work(gt->ordered_wq, &gt->sriov.vf.migration.worker);
+       xe_pm_runtime_put(xe);
  }
  
  static void migration_worker_func(struct work_struct *w)
author	Matthew Brost <matthew.brost@intel.com>
	Wed, 8 Oct 2025 21:45:30 +0000 (14:45 -0700)
committer	Matthew Brost <matthew.brost@intel.com>
	Thu, 9 Oct 2025 10:24:28 +0000 (03:24 -0700)