]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
drm/xe: Fix taking invalid lock on wedge
authorLucas De Marchi <lucas.demarchi@intel.com>
Thu, 3 Apr 2025 05:38:05 +0000 (22:38 -0700)
committerLucas De Marchi <lucas.demarchi@intel.com>
Mon, 7 Apr 2025 21:22:35 +0000 (14:22 -0700)
If device wedges on e.g. GuC upload, the submission is not yet enabled
and the state is not even initialized. Protect the wedge call so it does
nothing in this case. It fixes the following splat:

[] xe 0000:bf:00.0: [drm] device wedged, needs recovery
[] ------------[ cut here ]------------
[] DEBUG_LOCKS_WARN_ON(lock->magic != lock)
[] WARNING: CPU: 48 PID: 312 at kernel/locking/mutex.c:564 __mutex_lock+0x8a1/0xe60
...
[] RIP: 0010:__mutex_lock+0x8a1/0xe60
[]  mutex_lock_nested+0x1b/0x30
[]  xe_guc_submit_wedge+0x80/0x2b0 [xe]

Reviewed-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Link: https://lore.kernel.org/r/20250402-warn-after-wedge-v1-1-93e971511fa5@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
drivers/gpu/drm/xe/xe_guc_submit.c
drivers/gpu/drm/xe/xe_guc_types.h

index 31bc2022bfc2d80f0ef54726dfeb8d7f8e6b32c8..813c3c0bb2500722b03831d9815e83400460c9e2 100644 (file)
@@ -300,6 +300,8 @@ int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids)
 
        primelockdep(guc);
 
+       guc->submission_state.initialized = true;
+
        return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc);
 }
 
@@ -834,6 +836,13 @@ void xe_guc_submit_wedge(struct xe_guc *guc)
 
        xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode);
 
+       /*
+        * If device is being wedged even before submission_state is
+        * initialized, there's nothing to do here.
+        */
+       if (!guc->submission_state.initialized)
+               return;
+
        err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev,
                                       guc_submit_wedged_fini, guc);
        if (err) {
index 63bac64429a5dc5e86c0f8b1877539869ee4d46c..1fde7614fcc522a472999dc945b70970cd0a6bde 100644 (file)
@@ -89,6 +89,11 @@ struct xe_guc {
                struct mutex lock;
                /** @submission_state.enabled: submission is enabled */
                bool enabled;
+               /**
+                * @submission_state.initialized: mark when submission state is
+                * even initialized - before that not even the lock is valid
+                */
+               bool initialized;
                /** @submission_state.fini_wq: submit fini wait queue */
                wait_queue_head_t fini_wq;
        } submission_state;