]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
drm/xe/guc: Explicitly exit CT safe mode on unwind
authorMichal Wajdeczko <michal.wajdeczko@intel.com>
Thu, 12 Jun 2025 22:09:37 +0000 (00:09 +0200)
committerThomas Hellström <thomas.hellstrom@linux.intel.com>
Thu, 26 Jun 2025 12:52:43 +0000 (14:52 +0200)
During driver probe we might be briefly using CT safe mode, which
is based on a delayed work, but usually we are able to stop this
once we have IRQ fully operational.  However, if we abort the probe
quite early then during unwind we might try to destroy the workqueue
while there is still a pending delayed work that attempts to restart
itself which triggers a WARN.

This was recently observed during unsuccessful VF initialization:

 [ ] xe 0000:00:02.1: probe with driver xe failed with error -62
 [ ] ------------[ cut here ]------------
 [ ] workqueue: cannot queue safe_mode_worker_func [xe] on wq xe-g2h-wq
 [ ] WARNING: CPU: 9 PID: 0 at kernel/workqueue.c:2257 __queue_work+0x287/0x710
 [ ] RIP: 0010:__queue_work+0x287/0x710
 [ ] Call Trace:
 [ ]  delayed_work_timer_fn+0x19/0x30
 [ ]  call_timer_fn+0xa1/0x2a0

Exit the CT safe mode on unwind to avoid that warning.

Fixes: 09b286950f29 ("drm/xe/guc: Allow CTB G2H processing without G2H IRQ")
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://lore.kernel.org/r/20250612220937.857-3-michal.wajdeczko@intel.com
(cherry picked from commit 2ddbb73ec20b98e70a5200cb85deade22ccea2ec)
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
drivers/gpu/drm/xe/xe_guc_ct.c

index d0ac48d8f4f799fbbd510d792b84167f7dff914b..bbcbb348256f40732d4e9dcdfd6d1f54ffc31788 100644 (file)
 #include "xe_pm.h"
 #include "xe_trace_guc.h"
 
+static void receive_g2h(struct xe_guc_ct *ct);
+static void g2h_worker_func(struct work_struct *w);
+static void safe_mode_worker_func(struct work_struct *w);
+static void ct_exit_safe_mode(struct xe_guc_ct *ct);
+
 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
 enum {
        /* Internal states, not error conditions */
@@ -186,14 +191,11 @@ static void guc_ct_fini(struct drm_device *drm, void *arg)
 {
        struct xe_guc_ct *ct = arg;
 
+       ct_exit_safe_mode(ct);
        destroy_workqueue(ct->g2h_wq);
        xa_destroy(&ct->fence_lookup);
 }
 
-static void receive_g2h(struct xe_guc_ct *ct);
-static void g2h_worker_func(struct work_struct *w);
-static void safe_mode_worker_func(struct work_struct *w);
-
 static void primelockdep(struct xe_guc_ct *ct)
 {
        if (!IS_ENABLED(CONFIG_LOCKDEP))