drm/xe: Add mutex locking to devcoredump

author John Harrison <John.C.Harrison@Intel.com>

Thu, 28 Nov 2024 21:08:24 +0000 (13:08 -0800)

committer John Harrison <John.C.Harrison@Intel.com>

Mon, 2 Dec 2024 23:50:44 +0000 (15:50 -0800)
author John Harrison <John.C.Harrison@Intel.com>
Thu, 28 Nov 2024 21:08:24 +0000 (13:08 -0800)
committer John Harrison <John.C.Harrison@Intel.com>
Mon, 2 Dec 2024 23:50:44 +0000 (15:50 -0800)
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c

index d24f1088e298a436c6cb0657253fcc19406a3391..71636e80b71dae88c96ee7184b79f93aa0b7f2ec 100644 (file)
--- a/drivers/gpu/drm/xe/xe_devcoredump.c
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -183,16 +183,24 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
         /* Ensure delayed work is captured before continuing */
         flush_work(&ss->work);
  
-       if (!ss->read.buffer)
+       mutex_lock(&coredump->lock);
+
+       if (!ss->read.buffer) {
+               mutex_unlock(&coredump->lock);
                 return -ENODEV;
+       }
  
-       if (offset >= ss->read.size)
+       if (offset >= ss->read.size) {
+               mutex_unlock(&coredump->lock);
                 return 0;
+       }
  
         byte_copied = count < ss->read.size - offset ? count :
                 ss->read.size - offset;
         memcpy(buffer, ss->read.buffer + offset, byte_copied);
  
+       mutex_unlock(&coredump->lock);
+
         return byte_copied;
  }
  
@@ -206,6 +214,8 @@ static void xe_devcoredump_free(void *data)
  
         cancel_work_sync(&coredump->snapshot.work);
  
+       mutex_lock(&coredump->lock);
+
         xe_devcoredump_snapshot_free(&coredump->snapshot);
         kvfree(coredump->snapshot.read.buffer);
  
@@ -214,6 +224,8 @@ static void xe_devcoredump_free(void *data)
         coredump->captured = false;
         drm_info(&coredump_to_xe(coredump)->drm,
                  "Xe device coredump has been deleted.\n");
+
+       mutex_unlock(&coredump->lock);
  }
  
  static void xe_devcoredump_deferred_snap_work(struct work_struct *work)
@@ -327,8 +339,11 @@ void xe_devcoredump(struct xe_exec_queue *q, struct xe_sched_job *job, const cha
         struct xe_devcoredump *coredump = &xe->devcoredump;
         va_list varg;
  
+       mutex_lock(&coredump->lock);
+
         if (coredump->captured) {
                 drm_dbg(&xe->drm, "Multiple hangs are occurring, but only the first snapshot was taken\n");
+               mutex_unlock(&coredump->lock);
                 return;
         }
  
@@ -343,6 +358,8 @@ void xe_devcoredump(struct xe_exec_queue *q, struct xe_sched_job *job, const cha
         drm_info(&xe->drm, "Xe device coredump has been created\n");
         drm_info(&xe->drm, "Check your /sys/class/drm/card%d/device/devcoredump/data\n",
                  xe->drm.primary->index);
+
+       mutex_unlock(&coredump->lock);
  }
  
  static void xe_driver_devcoredump_fini(void *arg)
@@ -354,6 +371,18 @@ static void xe_driver_devcoredump_fini(void *arg)
  
  int xe_devcoredump_init(struct xe_device *xe)
  {
+       int err;
+
+       err = drmm_mutex_init(&xe->drm, &xe->devcoredump.lock);
+       if (err)
+               return err;
+
+       if (IS_ENABLED(CONFIG_LOCKDEP)) {
+               fs_reclaim_acquire(GFP_KERNEL);
+               might_lock(&xe->devcoredump.lock);
+               fs_reclaim_release(GFP_KERNEL);
+       }
+
         return devm_add_action_or_reset(xe->drm.dev, xe_driver_devcoredump_fini, &xe->drm);
  }
  
diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h

index e6234e887102b918979e66fba34add39d3237949..1a1d16a96b2d3d8365f482205453c7a7073642d4 100644 (file)
--- a/drivers/gpu/drm/xe/xe_devcoredump_types.h
+++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h
@@ -80,7 +80,9 @@ struct xe_devcoredump_snapshot {
   * for reading the information.
   */
  struct xe_devcoredump {
-       /** @captured: The snapshot of the first hang has already been taken. */
+       /** @lock: protects access to entire structure */
+       struct mutex lock;
+       /** @captured: The snapshot of the first hang has already been taken */
         bool captured;
         /** @snapshot: Snapshot is captured at time of the first crash */
         struct xe_devcoredump_snapshot snapshot;
author	John Harrison <John.C.Harrison@Intel.com>
	Thu, 28 Nov 2024 21:08:24 +0000 (13:08 -0800)
committer	John Harrison <John.C.Harrison@Intel.com>
	Mon, 2 Dec 2024 23:50:44 +0000 (15:50 -0800)
drivers/gpu/drm/xe/xe_devcoredump.c		patch \| blob \| blame \| history
drivers/gpu/drm/xe/xe_devcoredump_types.h		patch \| blob \| blame \| history