cancel_work_sync(&sched->work_process_msg);
}
+/**
+ * xe_sched_submission_stop_async - Stop further runs of submission tasks on a scheduler.
+ * @sched: the &xe_gpu_scheduler struct instance
+ *
+ * This call disables further runs of scheduling work queue. It does not wait
+ * for any in-progress runs to finish, only makes sure no further runs happen
+ * afterwards.
+ */
+void xe_sched_submission_stop_async(struct xe_gpu_scheduler *sched)
+{
+ drm_sched_wqueue_stop(&sched->base);
+}
+
void xe_sched_submission_resume_tdr(struct xe_gpu_scheduler *sched)
{
drm_sched_resume_timeout(&sched->base, sched->base.timeout);
}
+/**
+ * xe_guc_submit_pause - Stop further runs of submission tasks on given GuC.
+ * @guc: the &xe_guc struct instance whose scheduler is to be disabled
+ */
+void xe_guc_submit_pause(struct xe_guc *guc)
+{
+ struct xe_exec_queue *q;
+ unsigned long index;
+
+ xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
+ xe_sched_submission_stop_async(&q->guc->sched);
+}
+
static void guc_exec_queue_start(struct xe_exec_queue *q)
{
struct xe_gpu_scheduler *sched = &q->guc->sched;
return 0;
}
+static void guc_exec_queue_unpause(struct xe_exec_queue *q)
+{
+ struct xe_gpu_scheduler *sched = &q->guc->sched;
+
+ xe_sched_submission_start(sched);
+}
+
+/**
+ * xe_guc_submit_unpause - Allow further runs of submission tasks on given GuC.
+ * @guc: the &xe_guc struct instance whose scheduler is to be enabled
+ */
+void xe_guc_submit_unpause(struct xe_guc *guc)
+{
+ struct xe_exec_queue *q;
+ unsigned long index;
+
+ xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
+ guc_exec_queue_unpause(q);
+
+ wake_up_all(&guc->ct.wq);
+}
+
static struct xe_exec_queue *
g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id)
{
#include "xe_gt_sriov_printk.h"
#include "xe_gt_sriov_vf.h"
#include "xe_guc_ct.h"
+#include "xe_guc_submit.h"
+#include "xe_irq.h"
#include "xe_pm.h"
#include "xe_sriov.h"
#include "xe_sriov_printk.h"
xe_sriov_info(xe, "migration not supported by this module version\n");
}
+/**
+ * vf_post_migration_shutdown - Stop the driver activities after VF migration.
+ * @xe: the &xe_device struct instance
+ *
+ * After this VM is migrated and assigned to a new VF, it is running on a new
+ * hardware, and therefore many hardware-dependent states and related structures
+ * require fixups. Without fixups, the hardware cannot do any work, and therefore
+ * all GPU pipelines are stalled.
+ * Stop some of kernel activities to make the fixup process faster.
+ */
+static void vf_post_migration_shutdown(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ unsigned int id;
+
+ for_each_gt(gt, xe, id)
+ xe_guc_submit_pause(>->uc.guc);
+}
+
+/**
+ * vf_post_migration_kickstart - Re-start the driver activities under new hardware.
+ * @xe: the &xe_device struct instance
+ *
+ * After we have finished with all post-migration fixups, restart the driver
+ * activities to continue feeding the GPU with workloads.
+ */
+static void vf_post_migration_kickstart(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ unsigned int id;
+
+ /*
+ * Make sure interrupts on the new HW are properly set. The GuC IRQ
+ * must be working at this point, since the recovery did started,
+ * but the rest was not enabled using the procedure from spec.
+ */
+ xe_irq_resume(xe);
+
+ for_each_gt(gt, xe, id)
+ xe_guc_submit_unpause(>->uc.guc);
+}
+
static bool gt_vf_post_migration_needed(struct xe_gt *gt)
{
return test_bit(gt->info.id, >_to_xe(gt)->sriov.vf.migration.gt_flags);
drm_dbg(&xe->drm, "migration recovery in progress\n");
xe_pm_runtime_get(xe);
+ vf_post_migration_shutdown(xe);
if (!vf_migration_supported(xe)) {
xe_sriov_err(xe, "migration not supported by this module version\n");
set_bit(id, &fixed_gts);
}
+ vf_post_migration_kickstart(xe);
err = vf_post_migration_notify_resfix_done(xe, fixed_gts);
if (err)
goto fail;