]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
drm/xe/guc: Cancel ongoing H2G requests when stopping CT
authorMichal Wajdeczko <michal.wajdeczko@intel.com>
Wed, 9 Jul 2025 17:40:38 +0000 (19:40 +0200)
committerMichal Wajdeczko <michal.wajdeczko@intel.com>
Thu, 10 Jul 2025 19:46:29 +0000 (21:46 +0200)
Once we have started a GT reset sequence, which includes stopping
GuC CTB communication, we should also cancel all ongoing H2G send-
recv requests, as either GuC is already dead, or due to imminent
reset GuC will not be able to reply, or due to internal cleanup
we will lose pending fences. With this we will report dedicated
-ECANCELED error instead of misleading -ETIME.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
Acked-by: Matthew Brost <matthew.brost@intel.com>
Link: https://lore.kernel.org/r/20250709174038.1876-4-michal.wajdeczko@intel.com
drivers/gpu/drm/xe/xe_guc_ct.c

index 17e5870baf339628851c076ec082220cbf14b38a..b6acccfcd3514c2de7edacc76f6b9c847a3eaea7 100644 (file)
@@ -85,6 +85,7 @@ struct g2h_fence {
        u16 error;
        u16 hint;
        u16 reason;
+       bool cancel;
        bool retry;
        bool fail;
        bool done;
@@ -103,6 +104,13 @@ static void g2h_fence_init(struct g2h_fence *g2h_fence, u32 *response_buffer)
        g2h_fence->seqno = ~0x0;
 }
 
+static void g2h_fence_cancel(struct g2h_fence *g2h_fence)
+{
+       g2h_fence->cancel = true;
+       g2h_fence->fail = true;
+       g2h_fence->done = true;
+}
+
 static bool g2h_fence_needs_alloc(struct g2h_fence *g2h_fence)
 {
        return g2h_fence->seqno == ~0x0;
@@ -388,6 +396,8 @@ static void guc_ct_change_state(struct xe_guc_ct *ct,
                                enum xe_guc_ct_state state)
 {
        struct xe_gt *gt = ct_to_gt(ct);
+       struct g2h_fence *g2h_fence;
+       unsigned long idx;
 
        mutex_lock(&ct->lock);          /* Serialise dequeue_one_g2h() */
        spin_lock_irq(&ct->fast_lock);  /* Serialise CT fast-path */
@@ -406,6 +416,14 @@ static void guc_ct_change_state(struct xe_guc_ct *ct,
 
        spin_unlock_irq(&ct->fast_lock);
 
+       /* cancel all in-flight send-recv requests */
+       xa_for_each(&ct->fence_lookup, idx, g2h_fence)
+               g2h_fence_cancel(g2h_fence);
+
+       /* make sure guc_ct_send_recv() will see g2h_fence changes */
+       smp_mb();
+       wake_up_all(&ct->g2h_fence_wq);
+
        /*
         * Lockdep doesn't like this under the fast lock and he destroy only
         * needs to be serialized with the send path which ct lock provides.
@@ -1098,6 +1116,11 @@ retry_same_fence:
                goto retry;
        }
        if (g2h_fence.fail) {
+               if (g2h_fence.cancel) {
+                       xe_gt_dbg(gt, "H2G request %#x canceled!\n", action[0]);
+                       ret = -ECANCELED;
+                       goto unlock;
+               }
                xe_gt_err(gt, "H2G request %#x failed: error %#x hint %#x\n",
                          action[0], g2h_fence.error, g2h_fence.hint);
                ret = -EIO;
@@ -1106,6 +1129,7 @@ retry_same_fence:
        if (ret > 0)
                ret = response_buffer ? g2h_fence.response_len : g2h_fence.response_data;
 
+unlock:
        mutex_unlock(&ct->lock);
 
        return ret;