]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
drm/xe: include all registered queues in TLB invalidation
authorTangudu Tilak Tirumalesh <tilak.tirumalesh.tangudu@intel.com>
Mon, 8 Jun 2026 16:27:44 +0000 (21:57 +0530)
committerMatthew Brost <matthew.brost@intel.com>
Wed, 10 Jun 2026 16:33:29 +0000 (09:33 -0700)
Context-based TLB invalidation currently selects only scheduling-active
exec queues via q->ops->active(). During rebind flows, queues may be
suspended (or transitioning through resume) while still owning valid
translations, causing them to be skipped from invalidation and leading
to missed TLB invalidations on LR rebinds.

The underlying issue is a TOCTOU: q->guc->state bits are flipped lock-free
from enable_scheduling(), disable_scheduling{,_deregister}(), the
suspend/resume sched-msg handlers, handle_sched_done(), and
guc_exec_queue_stop(); nothing in send_tlb_inval_ctx_ppgtt() serializes
against them, so any state-based predicate can race.

Include all the registered queues so that TLB invalidations are not
missed. This is race-free because list membership on vm->exec_queues.list
is stable under vm->exec_queues.lock held by the caller. The performance
impact is expected to be minimal and harmless. If it does turn out to be
a concern, we can come back with a race-safe solution to ignore certain
queues.

Fixes: 6cdaa5346d6f ("drm/xe: Add context-based invalidation to GuC TLB invalidation backend")
Assisted-by: Claude:claude-opus-4.6
Suggested-by: Thomas Hellstrom <thomas.hellstrom@linux.intel.com>
Signed-off-by: Tangudu Tilak Tirumalesh <tilak.tirumalesh.tangudu@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patch.msgid.link/20260608162745.338725-2-tilak.tirumalesh.tangudu@intel.com
Signed-off-by: Shuicheng Lin <shuicheng.lin@intel.com>
(cherry picked from commit aa625e1e9f0710e424fe4f0e3f032807df81b5b0)
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
drivers/gpu/drm/xe/xe_guc_tlb_inval.c

index ced58f46f8466679698f1890b95f082db2cced03..cf6d106e6036ace990b1ca9867cbc8af316c9de5 100644 (file)
@@ -255,9 +255,8 @@ static int send_tlb_inval_ctx_ppgtt(struct xe_tlb_inval *tlb_inval, u32 seqno,
 #undef EXEC_QUEUE_COUNT_FULL_THRESHOLD
 
        /*
-        * Move exec queues to a temporary list to issue invalidations. The exec
-        * queue must active and a reference must be taken to prevent concurrent
-        * deregistrations.
+        * Move exec queues to a temporary list to issue invalidations. A
+        * reference must be taken to prevent concurrent deregistrations.
         *
         * List modification is safe because we hold 'vm->exec_queues.lock' for
         * reading, which prevents external modifications. Using a per-GT list
@@ -266,7 +265,7 @@ static int send_tlb_inval_ctx_ppgtt(struct xe_tlb_inval *tlb_inval, u32 seqno,
         */
        list_for_each_entry_safe(q, next, &vm->exec_queues.list[id],
                                 vm_exec_queue_link) {
-               if (q->ops->active(q) && xe_exec_queue_get_unless_zero(q)) {
+               if (xe_exec_queue_get_unless_zero(q)) {
                        last_q = q;
                        list_move_tail(&q->vm_exec_queue_link, &tlb_inval_list);
                }