]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
drm/xe/vf: Fail migration recovery if fixups needed but platform not supported
authorTomasz Lis <tomasz.lis@intel.com>
Mon, 19 May 2025 23:00:35 +0000 (01:00 +0200)
committerMichal Wajdeczko <michal.wajdeczko@intel.com>
Thu, 22 May 2025 10:04:09 +0000 (12:04 +0200)
The post-migration recovery needs to be fully implemented for a
specific platform in order to make continuation of workloads
possible.

New platforms introduce changes which affect the recovery procedure,
and without a clear verification of support this leads to errors
with no straight forward error message explaining the cause.

This patch fixes that issue - it introduces a message to be logged
when the current driver is known to not support the current platform.

Wedging the driver immediately also decreases the amount of
additional errors which would come afterwards if the driver continued
operation.

v2: Show the message during probe as well as during recovery; do not
  perform any recovery steps if the recovery is bound to fail
v3: Use SRIOV-specific logging, fix typos
v4: XE_DEBUG_SRIOV to XE_DEBUG check switch, to make testing more
  straightforward

Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Michał Winiarski <michal.winiarski@intel.com>
Reviewed-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Acked-by: Michał Winiarski <michal.winiarski@intel.com>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Link: https://lore.kernel.org/r/20250519230035.3143966-1-tomasz.lis@intel.com
drivers/gpu/drm/xe/xe_sriov_vf.c

index 2674fa948fda3467754e14d16c85eb35bb8c8782..46466932375c2748fc3c686809ebd3f2272c5587 100644 (file)
  *      |                               |                               |
  */
 
+static bool vf_migration_supported(struct xe_device *xe)
+{
+       /*
+        * TODO: Add conditions to allow specific platforms, when they're
+        * supported at production quality.
+        */
+       return IS_ENABLED(CONFIG_DRM_XE_DEBUG);
+}
+
 static void migration_worker_func(struct work_struct *w);
 
 /**
@@ -132,6 +141,9 @@ static void migration_worker_func(struct work_struct *w);
 void xe_sriov_vf_init_early(struct xe_device *xe)
 {
        INIT_WORK(&xe->sriov.vf.migration.worker, migration_worker_func);
+
+       if (!vf_migration_supported(xe))
+               xe_sriov_info(xe, "migration not supported by this module version\n");
 }
 
 /**
@@ -236,6 +248,11 @@ static void vf_post_migration_recovery(struct xe_device *xe)
                goto defer;
        if (unlikely(err))
                goto fail;
+       if (!vf_migration_supported(xe)) {
+               xe_sriov_err(xe, "migration not supported by this module version\n");
+               err = -ENOTRECOVERABLE;
+               goto fail;
+       }
 
        need_fixups = vf_post_migration_fixup_ggtt_nodes(xe);
        /* FIXME: add the recovery steps */