]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
drm/xe: Add exec queue param to devcoredump
authorMatthew Brost <matthew.brost@intel.com>
Thu, 14 Nov 2024 02:25:19 +0000 (18:25 -0800)
committerMatthew Brost <matthew.brost@intel.com>
Thu, 14 Nov 2024 14:38:44 +0000 (06:38 -0800)
During capture time, the target job may be unavailable (e.g., if it's in
LR mode). However, the associated exec queue will be available
regardless, so add an exec queue param for such cases.

v2:
 - Reword commit message (Jonathan)

Cc: Zhanjun Dong <zhanjun.dong@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241114022522.1951351-5-matthew.brost@intel.com
drivers/gpu/drm/xe/xe_devcoredump.c
drivers/gpu/drm/xe/xe_devcoredump.h
drivers/gpu/drm/xe/xe_guc_submit.c

index d3570d3d573c2c9501d8a5f033f63f89231d8e2d..c32cbb46ef8cbb79b3cb993db21b97e47e9dc4a0 100644 (file)
@@ -238,10 +238,10 @@ static void xe_devcoredump_free(void *data)
 }
 
 static void devcoredump_snapshot(struct xe_devcoredump *coredump,
+                                struct xe_exec_queue *q,
                                 struct xe_sched_job *job)
 {
        struct xe_devcoredump_snapshot *ss = &coredump->snapshot;
-       struct xe_exec_queue *q = job->q;
        struct xe_guc *guc = exec_queue_to_guc(q);
        u32 adj_logical_mask = q->logical_mask;
        u32 width_mask = (0x1 << q->width) - 1;
@@ -278,10 +278,12 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
        ss->guc.log = xe_guc_log_snapshot_capture(&guc->log, true);
        ss->guc.ct = xe_guc_ct_snapshot_capture(&guc->ct);
        ss->ge = xe_guc_exec_queue_snapshot_capture(q);
-       ss->job = xe_sched_job_snapshot_capture(job);
+       if (job)
+               ss->job = xe_sched_job_snapshot_capture(job);
        ss->vm = xe_vm_snapshot_capture(q->vm);
 
-       xe_engine_snapshot_capture_for_job(job);
+       if (job)
+               xe_engine_snapshot_capture_for_job(job);
 
        queue_work(system_unbound_wq, &ss->work);
 
@@ -291,15 +293,16 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
 
 /**
  * xe_devcoredump - Take the required snapshots and initialize coredump device.
+ * @q: The faulty xe_exec_queue, where the issue was detected.
  * @job: The faulty xe_sched_job, where the issue was detected.
  *
  * This function should be called at the crash time within the serialized
  * gt_reset. It is skipped if we still have the core dump device available
  * with the information of the 'first' snapshot.
  */
-void xe_devcoredump(struct xe_sched_job *job)
+void xe_devcoredump(struct xe_exec_queue *q, struct xe_sched_job *job)
 {
-       struct xe_device *xe = gt_to_xe(job->q->gt);
+       struct xe_device *xe = gt_to_xe(q->gt);
        struct xe_devcoredump *coredump = &xe->devcoredump;
 
        if (coredump->captured) {
@@ -308,7 +311,7 @@ void xe_devcoredump(struct xe_sched_job *job)
        }
 
        coredump->captured = true;
-       devcoredump_snapshot(coredump, job);
+       devcoredump_snapshot(coredump, q, job);
 
        drm_info(&xe->drm, "Xe device coredump has been created\n");
        drm_info(&xe->drm, "Check your /sys/class/drm/card%d/device/devcoredump/data\n",
index a4eebc285fc837d59846cace51a8a85f95e311b5..c04a534e3384e46273df30ccd53111e39336bc8d 100644 (file)
 
 struct drm_printer;
 struct xe_device;
+struct xe_exec_queue;
 struct xe_sched_job;
 
 #ifdef CONFIG_DEV_COREDUMP
-void xe_devcoredump(struct xe_sched_job *job);
+void xe_devcoredump(struct xe_exec_queue *q, struct xe_sched_job *job);
 int xe_devcoredump_init(struct xe_device *xe);
 #else
-static inline void xe_devcoredump(struct xe_sched_job *job)
+static inline void xe_devcoredump(struct xe_exec_queue *q,
+                                 struct xe_sched_job *job)
 {
 }
 
index 663ad4d97b34c3f69df7ae62e7eb69c1d1a6f63e..08a6578ee104c1a83b26629539250202a1a19478 100644 (file)
@@ -1154,7 +1154,7 @@ trigger_reset:
        trace_xe_sched_job_timedout(job);
 
        if (!exec_queue_killed(q))
-               xe_devcoredump(job);
+               xe_devcoredump(q, job);
 
        /*
         * Kernel jobs should never fail, nor should VM jobs if they do