]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
drm/xe/pm: Add lockdep annotation for the pm_block completion
authorThomas Hellström <thomas.hellstrom@linux.intel.com>
Thu, 18 Sep 2025 14:28:48 +0000 (16:28 +0200)
committerThomas Hellström <thomas.hellstrom@linux.intel.com>
Tue, 23 Sep 2025 12:42:58 +0000 (14:42 +0200)
Similar to how we annotate dma-fences, add lockep annotation to
the pm_block completion to ensure we don't wait for it while holding
locks that are needed in the pm notifier or in the device
suspend / resume callbacks.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://lore.kernel.org/r/20250918142848.21807-3-thomas.hellstrom@linux.intel.com
drivers/gpu/drm/xe/xe_exec.c
drivers/gpu/drm/xe/xe_pm.c
drivers/gpu/drm/xe/xe_pm.h
drivers/gpu/drm/xe/xe_vm.c

index 7715e74bb94544198d2d24b5be3123d66329e980..83897950f0da3e0d976c6e6f68a45242bf78ab7b 100644 (file)
@@ -16,6 +16,7 @@
 #include "xe_exec_queue.h"
 #include "xe_hw_engine_group.h"
 #include "xe_macros.h"
+#include "xe_pm.h"
 #include "xe_ring_ops_types.h"
 #include "xe_sched_job.h"
 #include "xe_sync.h"
@@ -247,7 +248,7 @@ retry:
         * on task freezing during suspend / hibernate, the call will
         * return -ERESTARTSYS and the IOCTL will be rerun.
         */
-       err = wait_for_completion_interruptible(&xe->pm_block);
+       err = xe_pm_block_on_suspend(xe);
        if (err)
                goto err_unlock_list;
 
index 1ec03ef19d9e8a8bb60ca7e6f68a8729623c4cb0..96afa49f0b4ba67119c1030b6700bb83844d15dc 100644 (file)
@@ -83,8 +83,58 @@ static struct lockdep_map xe_pm_runtime_d3cold_map = {
 static struct lockdep_map xe_pm_runtime_nod3cold_map = {
        .name = "xe_rpm_nod3cold_map"
 };
+
+static struct lockdep_map xe_pm_block_lockdep_map = {
+       .name = "xe_pm_block_map",
+};
 #endif
 
+static void xe_pm_block_begin_signalling(void)
+{
+       lock_acquire_shared_recursive(&xe_pm_block_lockdep_map, 0, 1, NULL, _RET_IP_);
+}
+
+static void xe_pm_block_end_signalling(void)
+{
+       lock_release(&xe_pm_block_lockdep_map, _RET_IP_);
+}
+
+/**
+ * xe_pm_might_block_on_suspend() - Annotate that the code might block on suspend
+ *
+ * Annotation to use where the code might block or sieze to make
+ * progress pending resume completion.
+ */
+void xe_pm_might_block_on_suspend(void)
+{
+       lock_map_acquire(&xe_pm_block_lockdep_map);
+       lock_map_release(&xe_pm_block_lockdep_map);
+}
+
+/**
+ * xe_pm_might_block_on_suspend() - Block pending suspend.
+ * @xe: The xe device about to be suspended.
+ *
+ * Block if the pm notifier has start evicting bos, to avoid
+ * racing and validating those bos back. The function is
+ * annotated to ensure no locks are held that are also grabbed
+ * in the pm notifier or the device suspend / resume.
+ * This is intended to be used by freezable tasks only.
+ * (Not freezable workqueues), with the intention that the function
+ * returns %-ERESTARTSYS when tasks are frozen during suspend,
+ * and allows the task to freeze. The caller must be able to
+ * handle the %-ERESTARTSYS.
+ *
+ * Return: %0 on success, %-ERESTARTSYS on signal pending or
+ * if freezing requested.
+ */
+int xe_pm_block_on_suspend(struct xe_device *xe)
+{
+       xe_pm_might_block_on_suspend();
+
+       return wait_for_completion_interruptible(&xe->pm_block);
+}
+
 /**
  * xe_rpm_reclaim_safe() - Whether runtime resume can be done from reclaim context
  * @xe: The xe device.
@@ -124,6 +174,7 @@ int xe_pm_suspend(struct xe_device *xe)
        int err;
 
        drm_dbg(&xe->drm, "Suspending device\n");
+       xe_pm_block_begin_signalling();
        trace_xe_pm_suspend(xe, __builtin_return_address(0));
 
        err = xe_pxp_pm_suspend(xe->pxp);
@@ -155,6 +206,8 @@ int xe_pm_suspend(struct xe_device *xe)
        xe_i2c_pm_suspend(xe);
 
        drm_dbg(&xe->drm, "Device suspended\n");
+       xe_pm_block_end_signalling();
+
        return 0;
 
 err_display:
@@ -162,6 +215,7 @@ err_display:
        xe_pxp_pm_resume(xe->pxp);
 err:
        drm_dbg(&xe->drm, "Device suspend failed %d\n", err);
+       xe_pm_block_end_signalling();
        return err;
 }
 
@@ -178,6 +232,7 @@ int xe_pm_resume(struct xe_device *xe)
        u8 id;
        int err;
 
+       xe_pm_block_begin_signalling();
        drm_dbg(&xe->drm, "Resuming device\n");
        trace_xe_pm_resume(xe, __builtin_return_address(0));
 
@@ -222,9 +277,11 @@ int xe_pm_resume(struct xe_device *xe)
        xe_late_bind_fw_load(&xe->late_bind);
 
        drm_dbg(&xe->drm, "Device resumed\n");
+       xe_pm_block_end_signalling();
        return 0;
 err:
        drm_dbg(&xe->drm, "Device resume failed %d\n", err);
+       xe_pm_block_end_signalling();
        return err;
 }
 
@@ -333,6 +390,7 @@ static int xe_pm_notifier_callback(struct notifier_block *nb,
                struct xe_validation_ctx ctx;
 
                reinit_completion(&xe->pm_block);
+               xe_pm_block_begin_signalling();
                xe_pm_runtime_get(xe);
                (void)xe_validation_ctx_init(&ctx, &xe->val, NULL,
                                             (struct xe_val_flags) {.exclusive = true});
@@ -349,6 +407,7 @@ static int xe_pm_notifier_callback(struct notifier_block *nb,
                 * avoid a runtime suspend interfering with evicted objects or backup
                 * allocations.
                 */
+               xe_pm_block_end_signalling();
                break;
        }
        case PM_POST_HIBERNATION:
index 59678b310e55f1a6b30f16ec09f5f23e1d30d0f5..f7f89a18b6fc737562bb491eaa5813f7d9c80ae8 100644 (file)
@@ -33,6 +33,8 @@ int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold);
 void xe_pm_d3cold_allowed_toggle(struct xe_device *xe);
 bool xe_rpm_reclaim_safe(const struct xe_device *xe);
 struct task_struct *xe_pm_read_callback_task(struct xe_device *xe);
+int xe_pm_block_on_suspend(struct xe_device *xe);
+void xe_pm_might_block_on_suspend(void);
 int xe_pm_module_init(void);
 
 #endif
index 0cacab20ff8523a5f49f8d9b5d2d42bd2254a295..80b7f13ecd8044f180d96e2445bb83fb91551eec 100644 (file)
@@ -466,6 +466,8 @@ static void preempt_rebind_work_func(struct work_struct *w)
 retry:
        if (!try_wait_for_completion(&vm->xe->pm_block) && vm_suspend_rebind_worker(vm)) {
                up_write(&vm->lock);
+               /* We don't actually block but don't make progress. */
+               xe_pm_might_block_on_suspend();
                return;
        }