]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
drm/xe/vf: Requeue recovery on GuC MIGRATION error during VF post-migration
authorSatyanarayana K V P <satyanarayana.k.v.p@intel.com>
Mon, 1 Dec 2025 09:50:15 +0000 (15:20 +0530)
committerMichal Wajdeczko <michal.wajdeczko@intel.com>
Tue, 2 Dec 2025 15:17:25 +0000 (16:17 +0100)
Handle GuC response `XE_GUC_RESPONSE_VF_MIGRATED` as a special case in the
VF post-migration recovery flow. When this error occurs, it indicates that
a new migration was detected while the resource fixup process was still in
progress. Instead of failing immediately, requeue the VF into the recovery
path to allow proper handling of the new migration event.

This improves robustness of VF recovery in SR-IOV environments where
migrations can overlap with resource fixup steps.

Signed-off-by: Satyanarayana K V P <satyanarayana.k.v.p@intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Tomasz Lis <tomasz.lis@intel.com>
Reviewed-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Link: https://patch.msgid.link/20251201095011.21453-9-satyanarayana.k.v.p@intel.com
drivers/gpu/drm/xe/xe_gt_sriov_vf.c
drivers/gpu/drm/xe/xe_guc.c

index 5bf13e41de80136e0d7a142729efb23f5becf2e4..0b3ecb000ff7123892387a4566a26fe66b8f6b07 100644 (file)
@@ -1268,6 +1268,9 @@ static void vf_post_migration_recovery(struct xe_gt *gt)
 
        err = vf_post_migration_resfix_done(gt, marker);
        if (err) {
+               if (err == -EREMCHG)
+                       goto queue;
+
                xe_gt_sriov_err(gt, "Recovery failed at GuC RESFIX_DONE step (%pe)\n",
                                ERR_PTR(err));
                goto fail;
index 88376bc2a483322a7d30062095a21ed59fd08475..f0407bab9a0c3126f44f1d9f9be6a88fa6bf247c 100644 (file)
@@ -1484,6 +1484,12 @@ timeout:
                u32 hint = FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, header);
                u32 error = FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, header);
 
+               if (unlikely(error == XE_GUC_RESPONSE_VF_MIGRATED)) {
+                       xe_gt_dbg(gt, "GuC mmio request %#x rejected due to MIGRATION (hint %#x)\n",
+                                 request[0], hint);
+                       return -EREMCHG;
+               }
+
                xe_gt_err(gt, "GuC mmio request %#x: failure %#x hint %#x\n",
                          request[0], error, hint);
                return -ENXIO;