]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
drm/xe: Fix error handling if PXP fails to start
authorDaniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Tue, 9 Sep 2025 22:12:40 +0000 (15:12 -0700)
committerRodrigo Vivi <rodrigo.vivi@intel.com>
Wed, 17 Sep 2025 16:28:55 +0000 (12:28 -0400)
Since the PXP start comes after __xe_exec_queue_init() has completed,
we need to cleanup what was done in that function in case of a PXP
start error.
__xe_exec_queue_init calls the submission backend init() function,
so we need to introduce an opposite for that. Unfortunately, while
we already have a fini() function pointer, it performs other
operations in addition to cleaning up what was done by the init().
Therefore, for clarity, the existing fini() has been renamed to
destroy(), while a new fini() has been added to only clean up what was
done by the init(), with the latter being called by the former (via
xe_exec_queue_fini).

Fixes: 72d479601d67 ("drm/xe/pxp/uapi: Add userspace and LRC support for PXP-using queues")
Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: John Harrison <John.C.Harrison@Intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: John Harrison <John.C.Harrison@Intel.com>
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Link: https://lore.kernel.org/r/20250909221240.3711023-3-daniele.ceraolospurio@intel.com
(cherry picked from commit 626667321deb4c7a294725406faa3dd71c3d445d)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
drivers/gpu/drm/xe/xe_exec_queue.c
drivers/gpu/drm/xe/xe_exec_queue_types.h
drivers/gpu/drm/xe/xe_execlist.c
drivers/gpu/drm/xe/xe_execlist_types.h
drivers/gpu/drm/xe/xe_guc_exec_queue_types.h
drivers/gpu/drm/xe/xe_guc_submit.c

index 8991b4aed440710590e0de4feee7784049c31388..c07edcda99c5caa38aa53b7a06cd15398015d422 100644 (file)
@@ -151,6 +151,16 @@ err_lrc:
        return err;
 }
 
+static void __xe_exec_queue_fini(struct xe_exec_queue *q)
+{
+       int i;
+
+       q->ops->fini(q);
+
+       for (i = 0; i < q->width; ++i)
+               xe_lrc_put(q->lrc[i]);
+}
+
 struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm,
                                           u32 logical_mask, u16 width,
                                           struct xe_hw_engine *hwe, u32 flags,
@@ -181,11 +191,13 @@ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *v
        if (xe_exec_queue_uses_pxp(q)) {
                err = xe_pxp_exec_queue_add(xe->pxp, q);
                if (err)
-                       goto err_post_alloc;
+                       goto err_post_init;
        }
 
        return q;
 
+err_post_init:
+       __xe_exec_queue_fini(q);
 err_post_alloc:
        __xe_exec_queue_free(q);
        return ERR_PTR(err);
@@ -283,13 +295,11 @@ void xe_exec_queue_destroy(struct kref *ref)
                        xe_exec_queue_put(eq);
        }
 
-       q->ops->fini(q);
+       q->ops->destroy(q);
 }
 
 void xe_exec_queue_fini(struct xe_exec_queue *q)
 {
-       int i;
-
        /*
         * Before releasing our ref to lrc and xef, accumulate our run ticks
         * and wakeup any waiters.
@@ -298,9 +308,7 @@ void xe_exec_queue_fini(struct xe_exec_queue *q)
        if (q->xef && atomic_dec_and_test(&q->xef->exec_queue.pending_removal))
                wake_up_var(&q->xef->exec_queue.pending_removal);
 
-       for (i = 0; i < q->width; ++i)
-               xe_lrc_put(q->lrc[i]);
-
+       __xe_exec_queue_fini(q);
        __xe_exec_queue_free(q);
 }
 
index cc1cffb5c87f1d0a4d2cea9e7b57cee9835762af..1c9d03f2a3e5da34ea790aeb9bcb8170681e46bb 100644 (file)
@@ -166,8 +166,14 @@ struct xe_exec_queue_ops {
        int (*init)(struct xe_exec_queue *q);
        /** @kill: Kill inflight submissions for backend */
        void (*kill)(struct xe_exec_queue *q);
-       /** @fini: Fini exec queue for submission backend */
+       /** @fini: Undoes the init() for submission backend */
        void (*fini)(struct xe_exec_queue *q);
+       /**
+        * @destroy: Destroy exec queue for submission backend. The backend
+        * function must call xe_exec_queue_fini() (which will in turn call the
+        * fini() backend function) to ensure the queue is properly cleaned up.
+        */
+       void (*destroy)(struct xe_exec_queue *q);
        /** @set_priority: Set priority for exec queue */
        int (*set_priority)(struct xe_exec_queue *q,
                            enum xe_exec_queue_priority priority);
index 788f56b066b6ad1a823e8378df41bb0fcaf37c0d..f83d421ac9d3d2a098fdeb170e96f36eb09baec7 100644 (file)
@@ -385,10 +385,20 @@ err_free:
        return err;
 }
 
-static void execlist_exec_queue_fini_async(struct work_struct *w)
+static void execlist_exec_queue_fini(struct xe_exec_queue *q)
+{
+       struct xe_execlist_exec_queue *exl = q->execlist;
+
+       drm_sched_entity_fini(&exl->entity);
+       drm_sched_fini(&exl->sched);
+
+       kfree(exl);
+}
+
+static void execlist_exec_queue_destroy_async(struct work_struct *w)
 {
        struct xe_execlist_exec_queue *ee =
-               container_of(w, struct xe_execlist_exec_queue, fini_async);
+               container_of(w, struct xe_execlist_exec_queue, destroy_async);
        struct xe_exec_queue *q = ee->q;
        struct xe_execlist_exec_queue *exl = q->execlist;
        struct xe_device *xe = gt_to_xe(q->gt);
@@ -401,10 +411,6 @@ static void execlist_exec_queue_fini_async(struct work_struct *w)
                list_del(&exl->active_link);
        spin_unlock_irqrestore(&exl->port->lock, flags);
 
-       drm_sched_entity_fini(&exl->entity);
-       drm_sched_fini(&exl->sched);
-       kfree(exl);
-
        xe_exec_queue_fini(q);
 }
 
@@ -413,10 +419,10 @@ static void execlist_exec_queue_kill(struct xe_exec_queue *q)
        /* NIY */
 }
 
-static void execlist_exec_queue_fini(struct xe_exec_queue *q)
+static void execlist_exec_queue_destroy(struct xe_exec_queue *q)
 {
-       INIT_WORK(&q->execlist->fini_async, execlist_exec_queue_fini_async);
-       queue_work(system_unbound_wq, &q->execlist->fini_async);
+       INIT_WORK(&q->execlist->destroy_async, execlist_exec_queue_destroy_async);
+       queue_work(system_unbound_wq, &q->execlist->destroy_async);
 }
 
 static int execlist_exec_queue_set_priority(struct xe_exec_queue *q,
@@ -467,6 +473,7 @@ static const struct xe_exec_queue_ops execlist_exec_queue_ops = {
        .init = execlist_exec_queue_init,
        .kill = execlist_exec_queue_kill,
        .fini = execlist_exec_queue_fini,
+       .destroy = execlist_exec_queue_destroy,
        .set_priority = execlist_exec_queue_set_priority,
        .set_timeslice = execlist_exec_queue_set_timeslice,
        .set_preempt_timeout = execlist_exec_queue_set_preempt_timeout,
index 415140936f11da49ed988606c81fba0a3e471de9..92c4ba52db0cb1b617c1fd38cc9df02f355165d8 100644 (file)
@@ -42,7 +42,7 @@ struct xe_execlist_exec_queue {
 
        bool has_run;
 
-       struct work_struct fini_async;
+       struct work_struct destroy_async;
 
        enum xe_exec_queue_priority active_priority;
        struct list_head active_link;
index a3f421e2adc03befbf08d149cb4962f010796f01..c30c0e3ccbbb9339847944421efb33b5b9a439ce 100644 (file)
@@ -35,8 +35,8 @@ struct xe_guc_exec_queue {
        struct xe_sched_msg static_msgs[MAX_STATIC_MSG_TYPE];
        /** @lr_tdr: long running TDR worker */
        struct work_struct lr_tdr;
-       /** @fini_async: do final fini async from this worker */
-       struct work_struct fini_async;
+       /** @destroy_async: do final destroy async from this worker */
+       struct work_struct destroy_async;
        /** @resume_time: time of last resume */
        u64 resume_time;
        /** @state: GuC specific state for this xe_exec_queue */
index cafb47711e9b3fab3b4b4197965835197caabe9b..93fc7b290b655f95139346dfc3e269fda3b7ed76 100644 (file)
@@ -1277,48 +1277,57 @@ rearm:
        return DRM_GPU_SCHED_STAT_NO_HANG;
 }
 
-static void __guc_exec_queue_fini_async(struct work_struct *w)
+static void guc_exec_queue_fini(struct xe_exec_queue *q)
+{
+       struct xe_guc_exec_queue *ge = q->guc;
+       struct xe_guc *guc = exec_queue_to_guc(q);
+
+       release_guc_id(guc, q);
+       xe_sched_entity_fini(&ge->entity);
+       xe_sched_fini(&ge->sched);
+
+       /*
+        * RCU free due sched being exported via DRM scheduler fences
+        * (timeline name).
+        */
+       kfree_rcu(ge, rcu);
+}
+
+static void __guc_exec_queue_destroy_async(struct work_struct *w)
 {
        struct xe_guc_exec_queue *ge =
-               container_of(w, struct xe_guc_exec_queue, fini_async);
+               container_of(w, struct xe_guc_exec_queue, destroy_async);
        struct xe_exec_queue *q = ge->q;
        struct xe_guc *guc = exec_queue_to_guc(q);
 
        xe_pm_runtime_get(guc_to_xe(guc));
        trace_xe_exec_queue_destroy(q);
 
-       release_guc_id(guc, q);
        if (xe_exec_queue_is_lr(q))
                cancel_work_sync(&ge->lr_tdr);
        /* Confirm no work left behind accessing device structures */
        cancel_delayed_work_sync(&ge->sched.base.work_tdr);
-       xe_sched_entity_fini(&ge->entity);
-       xe_sched_fini(&ge->sched);
 
-       /*
-        * RCU free due sched being exported via DRM scheduler fences
-        * (timeline name).
-        */
-       kfree_rcu(ge, rcu);
        xe_exec_queue_fini(q);
+
        xe_pm_runtime_put(guc_to_xe(guc));
 }
 
-static void guc_exec_queue_fini_async(struct xe_exec_queue *q)
+static void guc_exec_queue_destroy_async(struct xe_exec_queue *q)
 {
        struct xe_guc *guc = exec_queue_to_guc(q);
        struct xe_device *xe = guc_to_xe(guc);
 
-       INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async);
+       INIT_WORK(&q->guc->destroy_async, __guc_exec_queue_destroy_async);
 
        /* We must block on kernel engines so slabs are empty on driver unload */
        if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q))
-               __guc_exec_queue_fini_async(&q->guc->fini_async);
+               __guc_exec_queue_destroy_async(&q->guc->destroy_async);
        else
-               queue_work(xe->destroy_wq, &q->guc->fini_async);
+               queue_work(xe->destroy_wq, &q->guc->destroy_async);
 }
 
-static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q)
+static void __guc_exec_queue_destroy(struct xe_guc *guc, struct xe_exec_queue *q)
 {
        /*
         * Might be done from within the GPU scheduler, need to do async as we
@@ -1327,7 +1336,7 @@ static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q)
         * this we and don't really care when everything is fini'd, just that it
         * is.
         */
-       guc_exec_queue_fini_async(q);
+       guc_exec_queue_destroy_async(q);
 }
 
 static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg)
@@ -1341,7 +1350,7 @@ static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg)
        if (exec_queue_registered(q))
                disable_scheduling_deregister(guc, q);
        else
-               __guc_exec_queue_fini(guc, q);
+               __guc_exec_queue_destroy(guc, q);
 }
 
 static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q)
@@ -1574,14 +1583,14 @@ static bool guc_exec_queue_try_add_msg(struct xe_exec_queue *q,
 #define STATIC_MSG_CLEANUP     0
 #define STATIC_MSG_SUSPEND     1
 #define STATIC_MSG_RESUME      2
-static void guc_exec_queue_fini(struct xe_exec_queue *q)
+static void guc_exec_queue_destroy(struct xe_exec_queue *q)
 {
        struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP;
 
        if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && !exec_queue_wedged(q))
                guc_exec_queue_add_msg(q, msg, CLEANUP);
        else
-               __guc_exec_queue_fini(exec_queue_to_guc(q), q);
+               __guc_exec_queue_destroy(exec_queue_to_guc(q), q);
 }
 
 static int guc_exec_queue_set_priority(struct xe_exec_queue *q,
@@ -1711,6 +1720,7 @@ static const struct xe_exec_queue_ops guc_exec_queue_ops = {
        .init = guc_exec_queue_init,
        .kill = guc_exec_queue_kill,
        .fini = guc_exec_queue_fini,
+       .destroy = guc_exec_queue_destroy,
        .set_priority = guc_exec_queue_set_priority,
        .set_timeslice = guc_exec_queue_set_timeslice,
        .set_preempt_timeout = guc_exec_queue_set_preempt_timeout,
@@ -1732,7 +1742,7 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q)
                if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q))
                        xe_exec_queue_put(q);
                else if (exec_queue_destroyed(q))
-                       __guc_exec_queue_fini(guc, q);
+                       __guc_exec_queue_destroy(guc, q);
        }
        if (q->guc->suspend_pending) {
                set_exec_queue_suspended(q);
@@ -1989,7 +1999,7 @@ static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q)
        if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q))
                xe_exec_queue_put(q);
        else
-               __guc_exec_queue_fini(guc, q);
+               __guc_exec_queue_destroy(guc, q);
 }
 
 int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len)