]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
drm/xe: Decouple bind queue last fence from TLB invalidations
authorMatthew Brost <matthew.brost@intel.com>
Fri, 31 Oct 2025 23:40:47 +0000 (16:40 -0700)
committerMatthew Brost <matthew.brost@intel.com>
Tue, 4 Nov 2025 16:21:02 +0000 (08:21 -0800)
Separate the bind queue’s last fence to apply exclusively to the bind
job, avoiding unnecessary serialization on prior TLB invalidations.
Preserve correct user fence signaling by merging bind and TLB
invalidation fences later in the pipeline.

v3:
 - Fix lockdep assert for migrate queues (CI)
 - Use individual dma fence contexts for array out fences (Testing)
 - Don't set last fence with arrays (Testing)
 - Move TLB invalid last fence under migrate lock (Testing)
 - Don't set queue last for migrate queues (Testing)

Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/6047
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Link: https://patch.msgid.link/20251031234050.3043507-4-matthew.brost@intel.com
drivers/gpu/drm/xe/xe_pt.c
drivers/gpu/drm/xe/xe_sync.c
drivers/gpu/drm/xe/xe_tlb_inval_job.c
drivers/gpu/drm/xe/xe_tlb_inval_job.h
drivers/gpu/drm/xe/xe_vm.c
drivers/gpu/drm/xe/xe_vm_types.h

index 7c5bca78c8bf34a4b4dd5bb0ae5a8cd5b5c331a5..8ef9bfcbd9979ac0495f90428d944f205b5cb706 100644 (file)
@@ -3,8 +3,6 @@
  * Copyright © 2022 Intel Corporation
  */
 
-#include <linux/dma-fence-array.h>
-
 #include "xe_pt.h"
 
 #include "regs/xe_gtt_defs.h"
@@ -2359,10 +2357,9 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
        struct xe_vm *vm = vops->vm;
        struct xe_vm_pgtable_update_ops *pt_update_ops =
                &vops->pt_update_ops[tile->id];
-       struct dma_fence *fence, *ifence, *mfence;
+       struct xe_exec_queue *q = pt_update_ops->q;
+       struct dma_fence *fence, *ifence = NULL, *mfence = NULL;
        struct xe_tlb_inval_job *ijob = NULL, *mjob = NULL;
-       struct dma_fence **fences = NULL;
-       struct dma_fence_array *cf = NULL;
        struct xe_range_fence *rfence;
        struct xe_vma_op *op;
        int err = 0, i;
@@ -2390,15 +2387,14 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
 #endif
 
        if (pt_update_ops->needs_invalidation) {
-               struct xe_exec_queue *q = pt_update_ops->q;
                struct xe_dep_scheduler *dep_scheduler =
                        to_dep_scheduler(q, tile->primary_gt);
 
                ijob = xe_tlb_inval_job_create(q, &tile->primary_gt->tlb_inval,
-                                              dep_scheduler,
+                                              dep_scheduler, vm,
                                               pt_update_ops->start,
                                               pt_update_ops->last,
-                                              vm->usm.asid);
+                                              XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
                if (IS_ERR(ijob)) {
                        err = PTR_ERR(ijob);
                        goto kill_vm_tile1;
@@ -2410,26 +2406,15 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
 
                        mjob = xe_tlb_inval_job_create(q,
                                                       &tile->media_gt->tlb_inval,
-                                                      dep_scheduler,
+                                                      dep_scheduler, vm,
                                                       pt_update_ops->start,
                                                       pt_update_ops->last,
-                                                      vm->usm.asid);
+                                                      XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT);
                        if (IS_ERR(mjob)) {
                                err = PTR_ERR(mjob);
                                goto free_ijob;
                        }
                        update.mjob = mjob;
-
-                       fences = kmalloc_array(2, sizeof(*fences), GFP_KERNEL);
-                       if (!fences) {
-                               err = -ENOMEM;
-                               goto free_ijob;
-                       }
-                       cf = dma_fence_array_alloc(2);
-                       if (!cf) {
-                               err = -ENOMEM;
-                               goto free_ijob;
-                       }
                }
        }
 
@@ -2460,31 +2445,12 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
                                  pt_update_ops->last, fence))
                dma_fence_wait(fence, false);
 
-       /* tlb invalidation must be done before signaling unbind/rebind */
-       if (ijob) {
-               struct dma_fence *__fence;
-
+       if (ijob)
                ifence = xe_tlb_inval_job_push(ijob, tile->migrate, fence);
-               __fence = ifence;
+       if (mjob)
+               mfence = xe_tlb_inval_job_push(mjob, tile->migrate, fence);
 
-               if (mjob) {
-                       fences[0] = ifence;
-                       mfence = xe_tlb_inval_job_push(mjob, tile->migrate,
-                                                      fence);
-                       fences[1] = mfence;
-
-                       dma_fence_array_init(cf, 2, fences,
-                                            vm->composite_fence_ctx,
-                                            vm->composite_fence_seqno++,
-                                            false);
-                       __fence = &cf->base;
-               }
-
-               dma_fence_put(fence);
-               fence = __fence;
-       }
-
-       if (!mjob) {
+       if (!mjob && !ijob) {
                dma_resv_add_fence(xe_vm_resv(vm), fence,
                                   pt_update_ops->wait_vm_bookkeep ?
                                   DMA_RESV_USAGE_KERNEL :
@@ -2492,6 +2458,14 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
 
                list_for_each_entry(op, &vops->list, link)
                        op_commit(vops->vm, tile, pt_update_ops, op, fence, NULL);
+       } else if (ijob && !mjob) {
+               dma_resv_add_fence(xe_vm_resv(vm), ifence,
+                                  pt_update_ops->wait_vm_bookkeep ?
+                                  DMA_RESV_USAGE_KERNEL :
+                                  DMA_RESV_USAGE_BOOKKEEP);
+
+               list_for_each_entry(op, &vops->list, link)
+                       op_commit(vops->vm, tile, pt_update_ops, op, ifence, NULL);
        } else {
                dma_resv_add_fence(xe_vm_resv(vm), ifence,
                                   pt_update_ops->wait_vm_bookkeep ?
@@ -2511,16 +2485,23 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
        if (pt_update_ops->needs_svm_lock)
                xe_svm_notifier_unlock(vm);
 
+       /*
+        * The last fence is only used for zero bind queue idling; migrate
+        * queues are not exposed to user space.
+        */
+       if (!(q->flags & EXEC_QUEUE_FLAG_MIGRATE))
+               xe_exec_queue_last_fence_set(q, vm, fence);
+
        xe_tlb_inval_job_put(mjob);
        xe_tlb_inval_job_put(ijob);
+       dma_fence_put(ifence);
+       dma_fence_put(mfence);
 
        return fence;
 
 free_rfence:
        kfree(rfence);
 free_ijob:
-       kfree(cf);
-       kfree(fences);
        xe_tlb_inval_job_put(mjob);
        xe_tlb_inval_job_put(ijob);
 kill_vm_tile1:
index d48ab7b32ca51c7408ce752ee260eb5dc08ac977..df7ca349398b639d629c7ceb7d82d92f45553095 100644 (file)
@@ -14,7 +14,7 @@
 #include <drm/drm_syncobj.h>
 #include <uapi/drm/xe_drm.h>
 
-#include "xe_device_types.h"
+#include "xe_device.h"
 #include "xe_exec_queue.h"
 #include "xe_macros.h"
 #include "xe_sched_job_types.h"
@@ -297,26 +297,67 @@ xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync,
        struct dma_fence **fences = NULL;
        struct dma_fence_array *cf = NULL;
        struct dma_fence *fence;
-       int i, num_in_fence = 0, current_fence = 0;
+       int i, num_fence = 0, current_fence = 0;
 
        lockdep_assert_held(&vm->lock);
 
        /* Count in-fences */
        for (i = 0; i < num_sync; ++i) {
                if (sync[i].fence) {
-                       ++num_in_fence;
+                       ++num_fence;
                        fence = sync[i].fence;
                }
        }
 
        /* Easy case... */
-       if (!num_in_fence) {
+       if (!num_fence) {
+               if (q->flags & EXEC_QUEUE_FLAG_VM) {
+                       struct xe_exec_queue *__q;
+                       struct xe_tile *tile;
+                       u8 id;
+
+                       for_each_tile(tile, vm->xe, id)
+                               num_fence += (1 + XE_MAX_GT_PER_TILE);
+
+                       fences = kmalloc_array(num_fence, sizeof(*fences),
+                                              GFP_KERNEL);
+                       if (!fences)
+                               return ERR_PTR(-ENOMEM);
+
+                       fences[current_fence++] =
+                               xe_exec_queue_last_fence_get(q, vm);
+                       for_each_tlb_inval(i)
+                               fences[current_fence++] =
+                                       xe_exec_queue_tlb_inval_last_fence_get(q, vm, i);
+                       list_for_each_entry(__q, &q->multi_gt_list,
+                                           multi_gt_link) {
+                               fences[current_fence++] =
+                                       xe_exec_queue_last_fence_get(__q, vm);
+                               for_each_tlb_inval(i)
+                                       fences[current_fence++] =
+                                               xe_exec_queue_tlb_inval_last_fence_get(__q, vm, i);
+                       }
+
+                       xe_assert(vm->xe, current_fence == num_fence);
+                       cf = dma_fence_array_create(num_fence, fences,
+                                                   dma_fence_context_alloc(1),
+                                                   1, false);
+                       if (!cf)
+                               goto err_out;
+
+                       return &cf->base;
+               }
+
                fence = xe_exec_queue_last_fence_get(q, vm);
                return fence;
        }
 
-       /* Create composite fence */
-       fences = kmalloc_array(num_in_fence + 1, sizeof(*fences), GFP_KERNEL);
+       /*
+        * Create composite fence - FIXME - the below code doesn't work. This is
+        * unused in Mesa so we are ok for the moment. Perhaps we just disable
+        * this entire code path if number of in fences != 0.
+        */
+       fences = kmalloc_array(num_fence + 1, sizeof(*fences), GFP_KERNEL);
        if (!fences)
                return ERR_PTR(-ENOMEM);
        for (i = 0; i < num_sync; ++i) {
@@ -326,14 +367,10 @@ xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync,
                }
        }
        fences[current_fence++] = xe_exec_queue_last_fence_get(q, vm);
-       cf = dma_fence_array_create(num_in_fence, fences,
-                                   vm->composite_fence_ctx,
-                                   vm->composite_fence_seqno++,
-                                   false);
-       if (!cf) {
-               --vm->composite_fence_seqno;
+       cf = dma_fence_array_create(num_fence, fences,
+                                   dma_fence_context_alloc(1), 1, false);
+       if (!cf)
                goto err_out;
-       }
 
        return &cf->base;
 
index 492def04a55953390fb43b8cf083e5689d24121e..1ae0dec2cf316128937d4dcd3bd54ad89311fe87 100644 (file)
@@ -12,6 +12,7 @@
 #include "xe_tlb_inval_job.h"
 #include "xe_migrate.h"
 #include "xe_pm.h"
+#include "xe_vm.h"
 
 /** struct xe_tlb_inval_job - TLB invalidation job */
 struct xe_tlb_inval_job {
@@ -21,6 +22,8 @@ struct xe_tlb_inval_job {
        struct xe_tlb_inval *tlb_inval;
        /** @q: exec queue issuing the invalidate */
        struct xe_exec_queue *q;
+       /** @vm: VM which TLB invalidation is being issued for */
+       struct xe_vm *vm;
        /** @refcount: ref count of this job */
        struct kref refcount;
        /**
@@ -32,8 +35,8 @@ struct xe_tlb_inval_job {
        u64 start;
        /** @end: End address to invalidate */
        u64 end;
-       /** @asid: Address space ID to invalidate */
-       u32 asid;
+       /** @type: GT type */
+       int type;
        /** @fence_armed: Fence has been armed */
        bool fence_armed;
 };
@@ -46,7 +49,7 @@ static struct dma_fence *xe_tlb_inval_job_run(struct xe_dep_job *dep_job)
                container_of(job->fence, typeof(*ifence), base);
 
        xe_tlb_inval_range(job->tlb_inval, ifence, job->start,
-                          job->end, job->asid);
+                          job->end, job->vm->usm.asid);
 
        return job->fence;
 }
@@ -70,9 +73,10 @@ static const struct xe_dep_job_ops dep_job_ops = {
  * @q: exec queue issuing the invalidate
  * @tlb_inval: TLB invalidation client
  * @dep_scheduler: Dependency scheduler for job
+ * @vm: VM which TLB invalidation is being issued for
  * @start: Start address to invalidate
  * @end: End address to invalidate
- * @asid: Address space ID to invalidate
+ * @type: GT type
  *
  * Create a TLB invalidation job and initialize internal fields. The caller is
  * responsible for releasing the creation reference.
@@ -81,8 +85,8 @@ static const struct xe_dep_job_ops dep_job_ops = {
  */
 struct xe_tlb_inval_job *
 xe_tlb_inval_job_create(struct xe_exec_queue *q, struct xe_tlb_inval *tlb_inval,
-                       struct xe_dep_scheduler *dep_scheduler, u64 start,
-                       u64 end, u32 asid)
+                       struct xe_dep_scheduler *dep_scheduler,
+                       struct xe_vm *vm, u64 start, u64 end, int type)
 {
        struct xe_tlb_inval_job *job;
        struct drm_sched_entity *entity =
@@ -90,19 +94,24 @@ xe_tlb_inval_job_create(struct xe_exec_queue *q, struct xe_tlb_inval *tlb_inval,
        struct xe_tlb_inval_fence *ifence;
        int err;
 
+       xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
+                 type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
+
        job = kmalloc(sizeof(*job), GFP_KERNEL);
        if (!job)
                return ERR_PTR(-ENOMEM);
 
        job->q = q;
+       job->vm = vm;
        job->tlb_inval = tlb_inval;
        job->start = start;
        job->end = end;
-       job->asid = asid;
        job->fence_armed = false;
        job->dep.ops = &dep_job_ops;
+       job->type = type;
        kref_init(&job->refcount);
        xe_exec_queue_get(q);   /* Pairs with put in xe_tlb_inval_job_destroy */
+       xe_vm_get(vm);          /* Pairs with put in xe_tlb_inval_job_destroy */
 
        ifence = kmalloc(sizeof(*ifence), GFP_KERNEL);
        if (!ifence) {
@@ -124,6 +133,7 @@ xe_tlb_inval_job_create(struct xe_exec_queue *q, struct xe_tlb_inval *tlb_inval,
 err_fence:
        kfree(ifence);
 err_job:
+       xe_vm_put(vm);
        xe_exec_queue_put(q);
        kfree(job);
 
@@ -138,6 +148,7 @@ static void xe_tlb_inval_job_destroy(struct kref *ref)
                container_of(job->fence, typeof(*ifence), base);
        struct xe_exec_queue *q = job->q;
        struct xe_device *xe = gt_to_xe(q->gt);
+       struct xe_vm *vm = job->vm;
 
        if (!job->fence_armed)
                kfree(ifence);
@@ -147,6 +158,7 @@ static void xe_tlb_inval_job_destroy(struct kref *ref)
 
        drm_sched_job_cleanup(&job->dep.drm);
        kfree(job);
+       xe_vm_put(vm);          /* Pairs with get from xe_tlb_inval_job_create */
        xe_exec_queue_put(q);   /* Pairs with get from xe_tlb_inval_job_create */
        xe_pm_runtime_put(xe);  /* Pairs with get from xe_tlb_inval_job_create */
 }
@@ -231,6 +243,11 @@ struct dma_fence *xe_tlb_inval_job_push(struct xe_tlb_inval_job *job,
        dma_fence_get(&job->dep.drm.s_fence->finished);
        drm_sched_entity_push_job(&job->dep.drm);
 
+       /* Let the upper layers fish this out */
+       xe_exec_queue_tlb_inval_last_fence_set(job->q, job->vm,
+                                              &job->dep.drm.s_fence->finished,
+                                              job->type);
+
        xe_migrate_job_unlock(m, job->q);
 
        /*
index e63edcb26b505d01323a0f5566a92d7cc238c793..4d6df1a6c6cae361966a62a4ca75f8cb6cd2e4a5 100644 (file)
 struct dma_fence;
 struct xe_dep_scheduler;
 struct xe_exec_queue;
+struct xe_migrate;
 struct xe_tlb_inval;
 struct xe_tlb_inval_job;
-struct xe_migrate;
+struct xe_vm;
 
 struct xe_tlb_inval_job *
 xe_tlb_inval_job_create(struct xe_exec_queue *q, struct xe_tlb_inval *tlb_inval,
                        struct xe_dep_scheduler *dep_scheduler,
-                       u64 start, u64 end, u32 asid);
+                       struct xe_vm *vm, u64 start, u64 end, int type);
 
 int xe_tlb_inval_job_alloc_dep(struct xe_tlb_inval_job *job);
 
index 7343f34757d249e5b139149f110dccd5270758c7..45cbe5f05107b165ea8a2845f3bf94dec94a2f06 100644 (file)
@@ -1623,9 +1623,6 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef)
                }
        }
 
-       if (number_tiles > 1)
-               vm->composite_fence_ctx = dma_fence_context_alloc(1);
-
        if (xef && xe->info.has_asid) {
                u32 asid;
 
@@ -3107,20 +3104,26 @@ static struct dma_fence *ops_execute(struct xe_vm *vm,
        struct dma_fence *fence = NULL;
        struct dma_fence **fences = NULL;
        struct dma_fence_array *cf = NULL;
-       int number_tiles = 0, current_fence = 0, err;
+       int number_tiles = 0, current_fence = 0, n_fence = 0, err;
        u8 id;
 
        number_tiles = vm_ops_setup_tile_args(vm, vops);
        if (number_tiles == 0)
                return ERR_PTR(-ENODATA);
 
-       if (number_tiles > 1) {
-               fences = kmalloc_array(number_tiles, sizeof(*fences),
-                                      GFP_KERNEL);
-               if (!fences) {
-                       fence = ERR_PTR(-ENOMEM);
-                       goto err_trace;
-               }
+       for_each_tile(tile, vm->xe, id)
+               n_fence += (1 + XE_MAX_GT_PER_TILE);
+
+       fences = kmalloc_array(n_fence, sizeof(*fences), GFP_KERNEL);
+       if (!fences) {
+               fence = ERR_PTR(-ENOMEM);
+               goto err_trace;
+       }
+
+       cf = dma_fence_array_alloc(n_fence);
+       if (!cf) {
+               fence = ERR_PTR(-ENOMEM);
+               goto err_out;
        }
 
        for_each_tile(tile, vm->xe, id) {
@@ -3137,29 +3140,30 @@ static struct dma_fence *ops_execute(struct xe_vm *vm,
        trace_xe_vm_ops_execute(vops);
 
        for_each_tile(tile, vm->xe, id) {
+               struct xe_exec_queue *q = vops->pt_update_ops[tile->id].q;
+               int i;
+
+               fence = NULL;
                if (!vops->pt_update_ops[id].num_ops)
-                       continue;
+                       goto collect_fences;
 
                fence = xe_pt_update_ops_run(tile, vops);
                if (IS_ERR(fence))
                        goto err_out;
 
-               if (fences)
-                       fences[current_fence++] = fence;
+collect_fences:
+               fences[current_fence++] = fence ?: dma_fence_get_stub();
+               xe_migrate_job_lock(tile->migrate, q);
+               for_each_tlb_inval(i)
+                       fences[current_fence++] =
+                               xe_exec_queue_tlb_inval_last_fence_get(q, vm, i);
+               xe_migrate_job_unlock(tile->migrate, q);
        }
 
-       if (fences) {
-               cf = dma_fence_array_create(number_tiles, fences,
-                                           vm->composite_fence_ctx,
-                                           vm->composite_fence_seqno++,
-                                           false);
-               if (!cf) {
-                       --vm->composite_fence_seqno;
-                       fence = ERR_PTR(-ENOMEM);
-                       goto err_out;
-               }
-               fence = &cf->base;
-       }
+       xe_assert(vm->xe, current_fence == n_fence);
+       dma_fence_array_init(cf, n_fence, fences, dma_fence_context_alloc(1),
+                            1, false);
+       fence = &cf->base;
 
        for_each_tile(tile, vm->xe, id) {
                if (!vops->pt_update_ops[id].num_ops)
@@ -3220,7 +3224,6 @@ static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op,
 static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops,
                                   struct dma_fence *fence)
 {
-       struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q);
        struct xe_user_fence *ufence;
        struct xe_vma_op *op;
        int i;
@@ -3241,7 +3244,6 @@ static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops,
        if (fence) {
                for (i = 0; i < vops->num_syncs; i++)
                        xe_sync_entry_signal(vops->syncs + i, fence);
-               xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
        }
 }
 
@@ -3435,19 +3437,19 @@ static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
                                       struct xe_sync_entry *syncs,
                                       int num_syncs)
 {
-       struct dma_fence *fence;
+       struct dma_fence *fence = NULL;
        int i, err = 0;
 
-       fence = xe_sync_in_fence_get(syncs, num_syncs,
-                                    to_wait_exec_queue(vm, q), vm);
-       if (IS_ERR(fence))
-               return PTR_ERR(fence);
+       if (num_syncs) {
+               fence = xe_sync_in_fence_get(syncs, num_syncs,
+                                            to_wait_exec_queue(vm, q), vm);
+               if (IS_ERR(fence))
+                       return PTR_ERR(fence);
 
-       for (i = 0; i < num_syncs; i++)
-               xe_sync_entry_signal(&syncs[i], fence);
+               for (i = 0; i < num_syncs; i++)
+                       xe_sync_entry_signal(&syncs[i], fence);
+       }
 
-       xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm,
-                                    fence);
        dma_fence_put(fence);
 
        return err;
index 830ed7b05c27eb60d69dc1c45e36a5be7f40b147..9043bc4a381cb008d97364bf150847197770d497 100644 (file)
@@ -221,11 +221,6 @@ struct xe_vm {
 #define XE_VM_FLAG_GSC                 BIT(8)
        unsigned long flags;
 
-       /** @composite_fence_ctx: context composite fence */
-       u64 composite_fence_ctx;
-       /** @composite_fence_seqno: seqno for composite fence */
-       u32 composite_fence_seqno;
-
        /**
         * @lock: outer most lock, protects objects of anything attached to this
         * VM