]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
drm/xe/multi_queue: Reset GT upon CGP_SYNC failure
authorNiranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Thu, 11 Dec 2025 01:03:00 +0000 (17:03 -0800)
committerNiranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Fri, 12 Dec 2025 03:21:47 +0000 (19:21 -0800)
If GuC doesn't response to CGP_SYNC message, trigger
GT reset and cleanup of all the queues of the multi
queue group.

Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patch.msgid.link/20251211010249.1647839-31-niranjana.vishwanathapura@intel.com
drivers/gpu/drm/xe/xe_guc_submit.c

index 3be5e78485c7a69c8fab444839617457d6e766d7..e8bde976e4c863c33984a8d39f0ced75e8e4b11a 100644 (file)
@@ -593,6 +593,23 @@ static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q)
                xe_sched_tdr_queue_imm(&q->guc->sched);
 }
 
+static void xe_guc_exec_queue_group_trigger_cleanup(struct xe_exec_queue *q)
+{
+       struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q);
+       struct xe_exec_queue_group *group = q->multi_queue.group;
+       struct xe_exec_queue *eq;
+
+       xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)),
+                    xe_exec_queue_is_multi_queue(q));
+
+       xe_guc_exec_queue_trigger_cleanup(primary);
+
+       mutex_lock(&group->list_lock);
+       list_for_each_entry(eq, &group->list, multi_queue.link)
+               xe_guc_exec_queue_trigger_cleanup(eq);
+       mutex_unlock(&group->list_lock);
+}
+
 static void xe_guc_exec_queue_reset_trigger_cleanup(struct xe_exec_queue *q)
 {
        if (xe_exec_queue_is_multi_queue(q)) {
@@ -618,6 +635,23 @@ static void xe_guc_exec_queue_reset_trigger_cleanup(struct xe_exec_queue *q)
        }
 }
 
+static void set_exec_queue_group_banned(struct xe_exec_queue *q)
+{
+       struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q);
+       struct xe_exec_queue_group *group = q->multi_queue.group;
+       struct xe_exec_queue *eq;
+
+       /* Ban all queues of the multi-queue group */
+       xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)),
+                    xe_exec_queue_is_multi_queue(q));
+       set_exec_queue_banned(primary);
+
+       mutex_lock(&group->list_lock);
+       list_for_each_entry(eq, &group->list, multi_queue.link)
+               set_exec_queue_banned(eq);
+       mutex_unlock(&group->list_lock);
+}
+
 #define parallel_read(xe_, map_, field_) \
        xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
                        field_)
@@ -677,7 +711,11 @@ static void xe_guc_exec_queue_group_cgp_sync(struct xe_guc *guc,
                                 !READ_ONCE(group->sync_pending) ||
                                 xe_guc_read_stopped(guc), HZ);
        if (!ret || xe_guc_read_stopped(guc)) {
+               /* CGP_SYNC failed. Reset gt, cleanup the group */
                xe_gt_warn(guc_to_gt(guc), "Wait for CGP_SYNC_DONE response failed!\n");
+               set_exec_queue_group_banned(q);
+               xe_gt_reset_async(q->gt);
+               xe_guc_exec_queue_group_trigger_cleanup(q);
                return;
        }