]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
drm/xe/guc: Track pending-enable source in submission state
authorMatthew Brost <matthew.brost@intel.com>
Wed, 8 Oct 2025 21:45:01 +0000 (14:45 -0700)
committerMatthew Brost <matthew.brost@intel.com>
Thu, 9 Oct 2025 10:22:18 +0000 (03:22 -0700)
Add explicit tracking in the GuC submission state to record the source
of a pending enable (TDR vs. queue resume path vs. submission).
Disambiguating the origin lets the GuC submission state machine apply
the correct recovery/replay behavior.

This helps VF restore: when the device comes back, the state machine knows
whether the pending enable stems from timeout recovery, from a queue resume
sequence, or submission and can gate sequencing and fixups accordingly.

v4:
 - Clarify commit message (Tomasz)

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Tomasz Lis <tomasz.lis@intel.com>
Link: https://lore.kernel.org/r/20251008214532.3442967-4-matthew.brost@intel.com
drivers/gpu/drm/xe/xe_guc_submit.c

index 16f78376f196686f6016bb07075315ee19053242..13746f32b231961ea377f127dcacbf9cc7fdca52 100644 (file)
@@ -69,6 +69,8 @@ exec_queue_to_guc(struct xe_exec_queue *q)
 #define EXEC_QUEUE_STATE_BANNED                        (1 << 9)
 #define EXEC_QUEUE_STATE_CHECK_TIMEOUT         (1 << 10)
 #define EXEC_QUEUE_STATE_EXTRA_REF             (1 << 11)
+#define EXEC_QUEUE_STATE_PENDING_RESUME                (1 << 12)
+#define EXEC_QUEUE_STATE_PENDING_TDR_EXIT      (1 << 13)
 
 static bool exec_queue_registered(struct xe_exec_queue *q)
 {
@@ -220,6 +222,36 @@ static void set_exec_queue_extra_ref(struct xe_exec_queue *q)
        atomic_or(EXEC_QUEUE_STATE_EXTRA_REF, &q->guc->state);
 }
 
+static bool __maybe_unused exec_queue_pending_resume(struct xe_exec_queue *q)
+{
+       return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_RESUME;
+}
+
+static void set_exec_queue_pending_resume(struct xe_exec_queue *q)
+{
+       atomic_or(EXEC_QUEUE_STATE_PENDING_RESUME, &q->guc->state);
+}
+
+static void clear_exec_queue_pending_resume(struct xe_exec_queue *q)
+{
+       atomic_and(~EXEC_QUEUE_STATE_PENDING_RESUME, &q->guc->state);
+}
+
+static bool __maybe_unused exec_queue_pending_tdr_exit(struct xe_exec_queue *q)
+{
+       return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_TDR_EXIT;
+}
+
+static void set_exec_queue_pending_tdr_exit(struct xe_exec_queue *q)
+{
+       atomic_or(EXEC_QUEUE_STATE_PENDING_TDR_EXIT, &q->guc->state);
+}
+
+static void clear_exec_queue_pending_tdr_exit(struct xe_exec_queue *q)
+{
+       atomic_and(~EXEC_QUEUE_STATE_PENDING_TDR_EXIT, &q->guc->state);
+}
+
 static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q)
 {
        return (atomic_read(&q->guc->state) &
@@ -1334,6 +1366,7 @@ trigger_reset:
        return DRM_GPU_SCHED_STAT_RESET;
 
 sched_enable:
+       set_exec_queue_pending_tdr_exit(q);
        enable_scheduling(q);
 rearm:
        /*
@@ -1493,6 +1526,7 @@ static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg)
                clear_exec_queue_suspended(q);
                if (!exec_queue_enabled(q)) {
                        q->guc->resume_time = RESUME_PENDING;
+                       set_exec_queue_pending_resume(q);
                        enable_scheduling(q);
                }
        } else {
@@ -2065,6 +2099,8 @@ static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q,
                xe_gt_assert(guc_to_gt(guc), exec_queue_pending_enable(q));
 
                q->guc->resume_time = ktime_get();
+               clear_exec_queue_pending_resume(q);
+               clear_exec_queue_pending_tdr_exit(q);
                clear_exec_queue_pending_enable(q);
                smp_wmb();
                wake_up_all(&guc->ct.wq);