From: Niranjana Vishwanathapura Date: Thu, 11 Dec 2025 01:03:00 +0000 (-0800) Subject: drm/xe/multi_queue: Reset GT upon CGP_SYNC failure X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=bb9343f122add786c57a6e8865209a9c6671bc9b;p=thirdparty%2Fkernel%2Flinux.git drm/xe/multi_queue: Reset GT upon CGP_SYNC failure If GuC doesn't response to CGP_SYNC message, trigger GT reset and cleanup of all the queues of the multi queue group. Signed-off-by: Niranjana Vishwanathapura Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20251211010249.1647839-31-niranjana.vishwanathapura@intel.com --- diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 3be5e78485c7..e8bde976e4c8 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -593,6 +593,23 @@ static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q) xe_sched_tdr_queue_imm(&q->guc->sched); } +static void xe_guc_exec_queue_group_trigger_cleanup(struct xe_exec_queue *q) +{ + struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); + struct xe_exec_queue_group *group = q->multi_queue.group; + struct xe_exec_queue *eq; + + xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), + xe_exec_queue_is_multi_queue(q)); + + xe_guc_exec_queue_trigger_cleanup(primary); + + mutex_lock(&group->list_lock); + list_for_each_entry(eq, &group->list, multi_queue.link) + xe_guc_exec_queue_trigger_cleanup(eq); + mutex_unlock(&group->list_lock); +} + static void xe_guc_exec_queue_reset_trigger_cleanup(struct xe_exec_queue *q) { if (xe_exec_queue_is_multi_queue(q)) { @@ -618,6 +635,23 @@ static void xe_guc_exec_queue_reset_trigger_cleanup(struct xe_exec_queue *q) } } +static void set_exec_queue_group_banned(struct xe_exec_queue *q) +{ + struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); + struct xe_exec_queue_group *group = q->multi_queue.group; + struct xe_exec_queue *eq; + + /* Ban all queues of the multi-queue group */ + xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), + xe_exec_queue_is_multi_queue(q)); + set_exec_queue_banned(primary); + + mutex_lock(&group->list_lock); + list_for_each_entry(eq, &group->list, multi_queue.link) + set_exec_queue_banned(eq); + mutex_unlock(&group->list_lock); +} + #define parallel_read(xe_, map_, field_) \ xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ field_) @@ -677,7 +711,11 @@ static void xe_guc_exec_queue_group_cgp_sync(struct xe_guc *guc, !READ_ONCE(group->sync_pending) || xe_guc_read_stopped(guc), HZ); if (!ret || xe_guc_read_stopped(guc)) { + /* CGP_SYNC failed. Reset gt, cleanup the group */ xe_gt_warn(guc_to_gt(guc), "Wait for CGP_SYNC_DONE response failed!\n"); + set_exec_queue_group_banned(q); + xe_gt_reset_async(q->gt); + xe_guc_exec_queue_group_trigger_cleanup(q); return; }